diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 7ea64e1..6570c85 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,28 +16,26 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
+ python-version: ['3.9', '3.11']
env:
OS: ${{ matrix.os }}
- PYTHON: '3.9'
+ PYTHON: ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v1
- name: Set up Python
- uses: actions/setup-python@master
+ uses: actions/setup-python@v5
with:
- python-version: 3.9
+ python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install click-default-group
- pip install .
+ pip install -e .[dev]
- name: Test and generate coverage report on Linux
run: |
- pip install pytest
- pip install pytest-cov
pytest --cov=./ --cov-report=xml
diff --git a/.github/workflows/paper.yml b/.github/workflows/paper.yml
index 80800ce..6f2e8a8 100644
--- a/.github/workflows/paper.yml
+++ b/.github/workflows/paper.yml
@@ -1,5 +1,13 @@
---
-on: [push]
+name: paper
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+ branches:
+ - main
jobs:
paper:
@@ -15,7 +23,7 @@ jobs:
# This should be the path to the paper within your repo.
paper-path: paper/paper.md
- name: Upload
- uses: actions/upload-artifact@v1
+ uses: actions/upload-artifact@v4
with:
name: paper
# This is the output path where Pandoc will write the compiled
diff --git a/README.md b/README.md
index b110684..6f5fc5b 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ Details on methodology and advanced usage are available on the [User Guide](http
Install `tethys` using pip:
-`pip install tethys-downscale`
+`pip install tethys-downscaling`
## Contributing to Tethys
diff --git a/docs/source/conf.py b/docs/source/conf.py
index b84a011..4051ccf 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,7 +26,7 @@
project = 'tethys'
copyright = '2021, Battelle Memorial Institute'
-author = 'Chris R. Vernon'
+author = 'Isaac Thompson, Chris R. Vernon, Hassan Niazi'
# The full version, including alpha/beta/rc tags
release = version
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index 4d234e0..64a7032 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -21,7 +21,7 @@ As a prerequisite, you'll need to have `Python `_ configuration file. The options in this file correspond to the arguments passed to the :ref:`Tethys class `. Options not present in the config file will use the default. An overview is provided in the following table, with more details and examples below.
-======================== =========================================================
-Option Description
-======================== =========================================================
-:ref:`years` list of years to be included spatial downscaling
-:ref:`resolution` resolution in degrees for spatial downscaling
-:ref:`demand_type` choice between "withdrawals" (default) or "consumption"
-:ref:`perform_temporal` choice between "false" (default) or "true"
-:ref:`gcam_db` relative path to a GCAM database
-:ref:`csv` relative path to csv file containing inputs
-:ref:`output_file` name of file to write outputs to
-:ref:`downscaling_rules` mapping from water demand sectors to proxy variables
-:ref:`proxy_files` mapping of spatial proxy files to their years/variables
-:ref:`map_files` list of files containing region maps
-:ref:`temporal_methods` mapping of sector to temporal downscaling method
-:ref:`temporal_files` files that will be accessible during temporal downscaling
-======================== =========================================================
+======================================= =============================================================================
+Option Description
+======================================= =============================================================================
+:ref:`years` list of years to be included spatial downscaling
+:ref:`resolution` resolution in degrees for spatial downscaling
+:ref:`demand_type` choice between "withdrawals" (default) or "consumption"
+:ref:`perform_temporal` choice between "false" (default) or "true"
+:ref:`gcam_db` relative path to a GCAM database
+:ref:`csv` relative path to csv file containing inputs
+:ref:`output_file` name of file to write outputs to
+:ref:`downscaling_rules` mapping from water demand sectors to proxy variables
+:ref:`proxy_files` mapping of spatial proxy files to their years/variables
+:ref:`map_files` list of files containing region maps
+:ref:`temporal_methods` mapping of sector to temporal downscaling method
+:ref:`temporal_files` files that will be accessible during temporal downscaling
+:ref:`source_disaggregation` if true, share of surface water vs groundwater withdrawal will be written out
+:ref:`irrigation_conveyance_efficiency` if set, irrigation withdrawal will be divided by this number
+======================================= =============================================================================
years
@@ -272,6 +274,23 @@ Mapping of files that will be accessible to temporal downscaling methods.
CDD: data/temporal/CDD_monthly.nc
+source_disaggregation
+^^^^^^^^^^^^^^^^^^^^
+If true, share of surface water vs groundwater withdrawal will be written out to a file called gridded_runoff_shares.nc
+.. code-block:: yaml
+
+ source_disaggregation: true
+
+
+irrigation_conveyance_efficiency
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+If set, irrigation withdrawal will be divided by this number, representing GCAM's irrigation conveyance efficiency
+
+.. code-block:: yaml
+
+ irrigation_conveyance_efficiency: 0.829937
+
+
Generalization
--------------
**tethys** was developed with consideration for GCAM's breakdown of water demand, but was designed to be as flexible as possible with support for user-specified downscaling configurations.
diff --git a/requirements.txt b/requirements.txt
index 57c8033..dd9e8c9 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
PyYAML>=6.0
-gcamreader>=1.2.5
+gcamreader>=1.4.0
numpy>=1.22
pandas>=1.2.4
netCDF4>=1.6
@@ -7,8 +7,6 @@ xarray>=2022.09.0
rioxarray>=0.12.4
tqdm>=4.66.2
matplotlib>=3.8.3
-dask[complete]>=2022.12.1
-bokeh!=3.0.*,>=2.4.2
-geopandas>=0.14.3
-dask_geopandas>=0.3.1
-hvplot>=0.9.2
+dask>=2022.12.1
+distributed>=2022.12.1
+setuptools<81
diff --git a/setup.py b/setup.py
index 2ea637e..63f95ee 100644
--- a/setup.py
+++ b/setup.py
@@ -23,35 +23,37 @@ def get_requirements():
url='https://github.com/JGCRI/tethys',
packages=find_packages(),
license='BSD-2-Clause',
- author='Isaac Thompson, Chris Vernon',
+ author='Isaac Thompson, Chris Vernon, Hassan Niazi',
python_requires='>=3.9, <4',
include_package_data=True,
install_requires=[
'PyYAML>=6.0',
- 'gcamreader>=1.2.5',
+ 'gcamreader==1.4.0',
'numpy>=1.22',
'pandas>=1.2.4',
'netCDF4>=1.6',
- 'dask[complete]>=2022.12.1',
- 'dask_geopandas>=0.3.1',
+ 'dask==2024.8.0',
+ 'distributed==2024.8.0',
'xarray>=2022.09.0',
'rioxarray>=0.12.4',
'tqdm>=4.66.2',
'matplotlib>=3.8.3',
+ 'setuptools<81',
'bokeh!=3.0.*,>=2.4.2',
'geopandas>=0.14.3',
+ 'dask_geopandas>=0.3.1',
'hvplot>=0.9.2',
],
extras_require={
'dev': [
- 'build~=0.5.1',
- 'nbsphinx~=0.8.6',
- 'setuptools~=57.0.0',
- 'sphinx~=4.0.2',
- 'sphinx-panels~=0.6.0',
- 'sphinx-rtd-theme~=0.5.2',
- 'sphinx-mathjax-offline~=0.0.1',
- 'twine>=3.4.1',
+ 'build==0.10.0',
+ 'nbsphinx==0.8.12',
+ 'sphinx==4.5.0',
+ 'sphinx-panels==0.6.0',
+ 'sphinx-rtd-theme==0.5.2',
+ 'sphinx-mathjax-offline==0.0.2',
+ 'pytest>=8.3.0',
+ 'pytest-cov>=5.0.0',
'click-default-group>=1.2.4',
]
}
diff --git a/tests/test_dynamic_population.py b/tests/test_dynamic_population.py
new file mode 100644
index 0000000..c775453
--- /dev/null
+++ b/tests/test_dynamic_population.py
@@ -0,0 +1,474 @@
+"""
+Test script to verify dynamic population functionality in Tethys.
+
+This script compares outputs from two scenarios:
+1. Static population: Uses only base year (2010) population for all years
+2. Dynamic population: Uses multiple years (2010, 2020) with interpolation
+
+The comparison demonstrates whether dynamic population is working correctly.
+"""
+
+import os
+import yaml
+import xarray as xr
+import numpy as np
+import pandas as pd
+from pathlib import Path
+
+import tethys
+from tethys.model import Tethys
+
+
+class DynamicPopulationTest:
+ """Test framework for comparing static vs dynamic population scenarios."""
+
+ def __init__(self, test_data_dir=None):
+ """
+ Initialize the test framework.
+
+ :param test_data_dir: Path to test data directory. If None, uses default test data location.
+ """
+ if test_data_dir is None:
+ self.test_data_dir = Path(__file__).parent / 'data'
+ else:
+ self.test_data_dir = Path(test_data_dir)
+
+ self.base_config = None
+ self.static_config_path = None
+ self.dynamic_config_path = None
+ self.static_csv_path = None
+ self.dynamic_csv_path = None
+ self.static_results = None
+ self.dynamic_results = None
+
+ def _load_base_config(self):
+ """Load the base test configuration."""
+ config_file = self.test_data_dir / 'config_test.yml'
+ with open(config_file, 'r') as f:
+ self.base_config = yaml.safe_load(f)
+
+ def _create_extended_csv(self, years):
+ """Create an extended CSV file with data for all specified years."""
+ # Load original CSV
+ original_csv = self.test_data_dir / 'data.csv'
+ df = pd.read_csv(original_csv)
+
+ # Get unique regions and sectors
+ regions = df['region'].unique()
+ sectors = df['sector'].unique()
+
+ # Create extended data with all years
+ extended_rows = []
+ for region in regions:
+ for sector in sectors:
+ region_sector_data = df[(df['region'] == region) & (df['sector'] == sector)].sort_values('year')
+
+ if len(region_sector_data) > 0:
+ # Interpolate/extrapolate for missing years
+ existing_years = region_sector_data['year'].values
+ existing_values = region_sector_data['value'].values
+
+ for year in years:
+ if year in existing_years:
+ # Use existing value
+ value = float(region_sector_data[region_sector_data['year'] == year]['value'].iloc[0])
+ else:
+ # Linear interpolation/extrapolation
+ if year < existing_years[0]:
+ # Extrapolate backward
+ value = existing_values[0]
+ elif year > existing_years[-1]:
+ # Extrapolate forward (linear trend)
+ if len(existing_years) >= 2:
+ # Use last two points for trend
+ slope = (existing_values[-1] - existing_values[-2]) / (existing_years[-1] - existing_years[-2])
+ value = existing_values[-1] + slope * (year - existing_years[-1])
+ else:
+ value = existing_values[-1]
+ else:
+ # Interpolate between existing years
+ idx = np.searchsorted(existing_years, year)
+ y1, y2 = existing_years[idx-1], existing_years[idx]
+ v1, v2 = existing_values[idx-1], existing_values[idx]
+ if y2 != y1:
+ value = v1 + (v2 - v1) * (year - y1) / (y2 - y1)
+ else:
+ value = v1
+
+ extended_rows.append({
+ 'region': region,
+ 'sector': sector,
+ 'year': year,
+ 'value': value
+ })
+
+ extended_df = pd.DataFrame(extended_rows)
+ return extended_df
+
+ def _create_static_config(self):
+ """Create a configuration file for static population scenario."""
+ # Create config file in test data directory so relative paths work
+ self.static_config_path = self.test_data_dir / 'config_static_temp.yml'
+ self.static_csv_path = self.test_data_dir / 'data_static_temp.csv'
+
+ # Create extended CSV with all years
+ extended_df = self._create_extended_csv([2010, 2015, 2020, 2025, 2030])
+ extended_df.to_csv(self.static_csv_path, index=False)
+
+ # Copy base config and modify for static population
+ config = self.base_config.copy()
+ config['years'] = [2010, 2015, 2020, 2025, 2030] # Multiple years to test
+ config['csv'] = str(self.static_csv_path.name) # Use extended CSV
+ config['proxy_files'] = {
+ 'population_{year}.tif': {
+ 'variables': 'Population',
+ 'years': [2010] # Only base year
+ }
+ }
+
+ with open(self.static_config_path, 'w') as f:
+ yaml.dump(config, f)
+
+ return str(self.static_config_path)
+
+ def _create_dynamic_config(self):
+ """Create a configuration file for dynamic population scenario."""
+ self.dynamic_config_path = self.test_data_dir / 'config_dynamic_temp.yml'
+ self.dynamic_csv_path = self.test_data_dir / 'data_dynamic_temp.csv'
+
+ # Create extended CSV with all years
+ extended_df = self._create_extended_csv([2010, 2015, 2020, 2025, 2030])
+ extended_df.to_csv(self.dynamic_csv_path, index=False)
+
+ # Copy base config and modify for dynamic population
+ config = self.base_config.copy()
+ config['years'] = [2010, 2015, 2020, 2025, 2030] # Multiple years to test
+ config['csv'] = str(self.dynamic_csv_path.name) # Use extended CSV
+ config['proxy_files'] = {
+ 'population_{year}.tif': {
+ 'variables': 'Population',
+ 'years': [2010, 2020] # Multiple years for interpolation (2030 will use 2020 as nearest endpoint)
+ }
+ }
+
+ with open(self.dynamic_config_path, 'w') as f:
+ yaml.dump(config, f)
+
+ return str(self.dynamic_config_path)
+
+ def run_static_scenario(self):
+ """Run Tethys with static population configuration."""
+ print("=" * 70)
+ print("Running STATIC population scenario (only 2010 population)")
+ print("=" * 70)
+
+ if self.static_config_path is None:
+ self._create_static_config()
+
+ # Use absolute path for config file
+ config_path = os.path.abspath(self.static_config_path)
+ model = Tethys(config_file=config_path)
+ model.run_model()
+ self.static_results = model.outputs
+
+ print("Static scenario completed.\n")
+ return self.static_results
+
+ def run_dynamic_scenario(self):
+ """Run Tethys with dynamic population configuration."""
+ print("=" * 70)
+ print("Running DYNAMIC population scenario (2010 and 2020 population)")
+ print("=" * 70)
+
+ if self.dynamic_config_path is None:
+ self._create_dynamic_config()
+
+ # Use absolute path for config file
+ config_path = os.path.abspath(self.dynamic_config_path)
+ model = Tethys(config_file=config_path)
+ model.run_model()
+ self.dynamic_results = model.outputs
+
+ print("Dynamic scenario completed.\n")
+ return self.dynamic_results
+
+ def compare_outputs(self):
+ """Compare outputs from static and dynamic scenarios."""
+ if self.static_results is None or self.dynamic_results is None:
+ raise ValueError("Both scenarios must be run before comparison.")
+
+ print("=" * 70)
+ print("COMPARING STATIC vs DYNAMIC POPULATION OUTPUTS")
+ print("=" * 70)
+
+ # Get the Municipal sector output (should use Population proxy)
+ sector = 'Municipal'
+
+ if sector not in self.static_results.data_vars:
+ # Try to find the sector in the outputs
+ available_sectors = list(self.static_results.data_vars.keys())
+ if available_sectors:
+ sector = available_sectors[0]
+ print(f"Warning: 'Municipal' not found, using '{sector}' instead.")
+ else:
+ raise ValueError("No sectors found in outputs.")
+
+ static_da = self.static_results[sector]
+ dynamic_da = self.dynamic_results[sector]
+
+ # Ensure same coordinates
+ static_da = static_da.reindex_like(dynamic_da, fill_value=0)
+
+ # Calculate differences
+ diff = dynamic_da - static_da
+ abs_diff = np.abs(diff)
+
+ # Calculate statistics per year
+ years = static_da.year.values if hasattr(static_da, 'year') else None
+
+ comparison_stats = {}
+
+ if years is not None:
+ for year in years:
+ static_year = static_da.sel(year=year, drop=True)
+ dynamic_year = dynamic_da.sel(year=year, drop=True)
+ diff_year = diff.sel(year=year, drop=True)
+ abs_diff_year = abs_diff.sel(year=year, drop=True)
+
+ # Calculate totals
+ static_total = float(static_year.sum().values)
+ dynamic_total = float(dynamic_year.sum().values)
+
+ # Calculate differences
+ total_diff = float(diff_year.sum().values)
+ total_abs_diff = float(abs_diff_year.sum().values)
+ max_abs_diff = float(abs_diff_year.max().values)
+ mean_abs_diff = float(abs_diff_year.mean().values)
+
+ # Calculate relative difference
+ if static_total != 0:
+ rel_diff_pct = (total_diff / static_total) * 100
+ else:
+ rel_diff_pct = np.nan
+
+ comparison_stats[int(year)] = {
+ 'static_total': static_total,
+ 'dynamic_total': dynamic_total,
+ 'total_difference': total_diff,
+ 'total_abs_difference': total_abs_diff,
+ 'max_abs_difference': max_abs_diff,
+ 'mean_abs_difference': mean_abs_diff,
+ 'relative_difference_pct': rel_diff_pct
+ }
+
+ return comparison_stats, diff, abs_diff
+
+ def generate_report(self, comparison_stats, diff, abs_diff, output_file=None):
+ """Generate a detailed comparison report."""
+ print("\n" + "=" * 70)
+ print("COMPARISON REPORT: STATIC vs DYNAMIC POPULATION")
+ print("=" * 70)
+
+ # Summary table
+ print("\nSUMMARY STATISTICS BY YEAR:")
+ print("-" * 70)
+ print(f"{'Year':<8} {'Static Total':<15} {'Dynamic Total':<15} {'Difference':<15} {'Rel Diff %':<12}")
+ print("-" * 70)
+
+ for year in sorted(comparison_stats.keys()):
+ stats = comparison_stats[year]
+ print(f"{year:<8} {stats['static_total']:<15.2f} {stats['dynamic_total']:<15.2f} "
+ f"{stats['total_difference']:<15.2f} {stats['relative_difference_pct']:<12.2f}")
+
+ print("-" * 70)
+
+ # Detailed statistics
+ print("\nDETAILED STATISTICS:")
+ print("-" * 70)
+ for year in sorted(comparison_stats.keys()):
+ stats = comparison_stats[year]
+ print(f"\nYear {year}:")
+ print(f" Total difference: {stats['total_difference']:.6f}")
+ print(f" Total absolute difference: {stats['total_abs_difference']:.6f}")
+ print(f" Maximum absolute difference: {stats['max_abs_difference']:.6f}")
+ print(f" Mean absolute difference: {stats['mean_abs_difference']:.6f}")
+ print(f" Relative difference: {stats['relative_difference_pct']:.2f}%")
+
+ # Validation checks
+ print("\n" + "=" * 70)
+ print("VALIDATION CHECKS:")
+ print("=" * 70)
+
+ validation_results = {}
+
+ # Check 1: 2010 should be identical (baseline year)
+ if 2010 in comparison_stats:
+ diff_2010 = abs(comparison_stats[2010]['total_difference'])
+ if diff_2010 < 1e-6:
+ print("✓ PASS: Year 2010 outputs are identical (baseline year)")
+ validation_results['baseline_identical'] = True
+ else:
+ print(f"✗ FAIL: Year 2010 outputs differ by {diff_2010:.6f}")
+ validation_results['baseline_identical'] = False
+
+ # Check 2: 2015 should differ (interpolation test)
+ if 2015 in comparison_stats:
+ diff_2015 = abs(comparison_stats[2015]['total_difference'])
+ if diff_2015 > 1e-6:
+ print(f"✓ PASS: Year 2015 outputs differ by {diff_2015:.6f} (interpolation working)")
+ validation_results['interpolation_working'] = True
+ else:
+ print(f"✗ FAIL: Year 2015 outputs are identical (interpolation may not be working)")
+ validation_results['interpolation_working'] = False
+
+ # Check 3: 2020 should differ (future year test)
+ if 2020 in comparison_stats:
+ diff_2020 = abs(comparison_stats[2020]['total_difference'])
+ if diff_2020 > 1e-6:
+ print(f"✓ PASS: Year 2020 outputs differ by {diff_2020:.6f} (future year being used)")
+ validation_results['future_year_used'] = True
+ else:
+ print(f"✗ FAIL: Year 2020 outputs are identical (future year may not be used)")
+ validation_results['future_year_used'] = False
+
+ # Check 4: 2025 should differ (will use 2020 as nearest endpoint since no 2030 data)
+ if 2025 in comparison_stats:
+ diff_2025 = abs(comparison_stats[2025]['total_difference'])
+ if diff_2025 > 1e-6:
+ print(f"✓ PASS: Year 2025 outputs differ by {diff_2025:.6f} (using 2020 as nearest endpoint)")
+ validation_results['interpolation_2025_working'] = True
+ else:
+ print(f"✗ FAIL: Year 2025 outputs are identical (may not be using 2020 endpoint)")
+ validation_results['interpolation_2025_working'] = False
+
+ # Check 5: 2030 should differ (will use 2020 as nearest endpoint since no 2030 data)
+ if 2030 in comparison_stats:
+ diff_2030 = abs(comparison_stats[2030]['total_difference'])
+ if diff_2030 > 1e-6:
+ print(f"✓ PASS: Year 2030 outputs differ by {diff_2030:.6f} (using 2020 as nearest endpoint)")
+ validation_results['extended_future_year_used'] = True
+ else:
+ print(f"✗ FAIL: Year 2030 outputs are identical (may not be using 2020 endpoint)")
+ validation_results['extended_future_year_used'] = False
+
+ # Overall assessment
+ print("\n" + "=" * 70)
+ print("OVERALL ASSESSMENT:")
+ print("=" * 70)
+
+ if all(validation_results.values()):
+ print("✓✓✓ DYNAMIC POPULATION IS WORKING CORRECTLY ✓✓✓")
+ print(" - Baseline year (2010) is identical")
+ print(" - Interpolated year (2015) shows differences")
+ print(" - Future year (2020) shows differences")
+ print(" - Extended interpolated year (2025) shows differences")
+ print(" - Extended future year (2030) shows differences")
+ else:
+ failed_checks = [k for k, v in validation_results.items() if not v]
+ print("✗✗✗ DYNAMIC POPULATION MAY NOT BE WORKING AS EXPECTED ✗✗✗")
+ print(f" Failed checks: {', '.join(failed_checks)}")
+
+ # Save to file if requested
+ if output_file:
+ self._save_report_to_file(comparison_stats, validation_results, output_file)
+
+ return validation_results
+
+ def _save_report_to_file(self, comparison_stats, validation_results, output_file):
+ """Save comparison report to a file."""
+ with open(output_file, 'w') as f:
+ f.write("DYNAMIC POPULATION TEST REPORT\n")
+ f.write("=" * 70 + "\n\n")
+
+ f.write("SUMMARY STATISTICS BY YEAR:\n")
+ f.write("-" * 70 + "\n")
+ f.write(f"{'Year':<8} {'Static Total':<15} {'Dynamic Total':<15} {'Difference':<15} {'Rel Diff %':<12}\n")
+ f.write("-" * 70 + "\n")
+
+ for year in sorted(comparison_stats.keys()):
+ stats = comparison_stats[year]
+ f.write(f"{year:<8} {stats['static_total']:<15.2f} {stats['dynamic_total']:<15.2f} "
+ f"{stats['total_difference']:<15.2f} {stats['relative_difference_pct']:<12.2f}\n")
+
+ f.write("\nVALIDATION RESULTS:\n")
+ f.write("-" * 70 + "\n")
+ for check, result in validation_results.items():
+ f.write(f"{check}: {'PASS' if result else 'FAIL'}\n")
+
+ print(f"\nReport saved to: {output_file}")
+
+ def cleanup(self):
+ """Clean up temporary config and CSV files."""
+ files_to_remove = [
+ self.static_config_path,
+ self.dynamic_config_path,
+ self.static_csv_path,
+ self.dynamic_csv_path
+ ]
+
+ removed = False
+ for file_path in files_to_remove:
+ if file_path and os.path.exists(file_path):
+ os.remove(file_path)
+ removed = True
+
+ if removed:
+ print("Cleaned up temporary config and CSV files.")
+
+
+def main():
+ """Main function to run the dynamic population test."""
+ import argparse
+
+ parser = argparse.ArgumentParser(description='Test dynamic population functionality in Tethys')
+ parser.add_argument('--test-data-dir', type=str, default=None,
+ help='Path to test data directory (default: tests/data)')
+ parser.add_argument('--output-report', type=str, default=None,
+ help='Path to save comparison report (optional)')
+ parser.add_argument('--no-cleanup', action='store_true',
+ help='Keep temporary files after test')
+
+ args = parser.parse_args()
+
+ # Initialize test framework
+ test = DynamicPopulationTest(test_data_dir=args.test_data_dir)
+
+ try:
+ # Load base config
+ test._load_base_config()
+
+ # Create config files
+ test._create_static_config()
+ test._create_dynamic_config()
+
+ # Run both scenarios
+ test.run_static_scenario()
+ test.run_dynamic_scenario()
+
+ # Compare outputs
+ comparison_stats, diff, abs_diff = test.compare_outputs()
+
+ # Generate report
+ validation_results = test.generate_report(comparison_stats, diff, abs_diff,
+ output_file=args.output_report)
+
+ # Return exit code based on validation
+ if all(validation_results.values()):
+ return 0
+ else:
+ return 1
+
+ except Exception as e:
+ print(f"\nERROR: {str(e)}")
+ import traceback
+ traceback.print_exc()
+ return 1
+ finally:
+ if not args.no_cleanup:
+ test.cleanup()
+
+
+if __name__ == '__main__':
+ import sys
+ sys.exit(main())
+
diff --git a/tethys/__init__.py b/tethys/__init__.py
index a88df2d..3fab7a4 100644
--- a/tethys/__init__.py
+++ b/tethys/__init__.py
@@ -3,4 +3,4 @@
from .utils.spatial import InputViewer
-__version__ = "2.1"
+__version__ = "2.2.0"
diff --git a/tethys/datareader/easy_query.py b/tethys/datareader/easy_query.py
index 723578e..68c660b 100644
--- a/tethys/datareader/easy_query.py
+++ b/tethys/datareader/easy_query.py
@@ -1,7 +1,7 @@
import gcamreader
-def easy_query(variable, year_axis=None, **kwargs):
+def easy_query(variable, year_axis=None, replace_filters=False, **kwargs):
"""Build a query for a GCAM database
kwargs act as filters on nodes, eg. "sector='Beef'" is converted to the xpath "[@type='sector' and @name='Beef']"
@@ -24,8 +24,11 @@ def easy_query(variable, year_axis=None, **kwargs):
:return: gcamreader Query object
"""
- filters = dict(sector=None) # match nodes where @type='sector' by default
- filters.update(kwargs) # update the filters with user-provided kwargs
+ if replace_filters:
+ filters = kwargs
+ else:
+ filters = dict(sector=None) # match nodes where @type='sector' by default
+ filters.update(kwargs) # update the filters with user-provided kwargs
# filters are separated by "//*" to match any descendant
xpath = "*" + "//*".join(handle_filter(k, v) for k, v in filters.items()) + f"//{variable}/node()"
diff --git a/tethys/datareader/gridded.py b/tethys/datareader/gridded.py
index 23d4059..2bf0012 100644
--- a/tethys/datareader/gridded.py
+++ b/tethys/datareader/gridded.py
@@ -2,7 +2,7 @@
import xarray as xr
-def load_file(filename, target_resolution, years, variables=None, flags=None, regrid_method='extensive'):
+def load_file(filename, target_resolution, years, variables=None, flags=(), bounds=None, regrid_method='extensive'):
"""Prepare a dataset from single file to be merged into a dataset of all proxies
handles many oddities found in proxies
@@ -12,13 +12,12 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re
:param years: years to extract from the file
:param variables: variables to extract from the file
:param flags: list potentially containing 'cell_area_share' or 'short_name_as_name'
+ :param bounds: list [lat_min, lat_max, lon_min, lon_max] to crop to
:param regrid_method: passed along to regrid
:return: preprocessed data set
"""
- flags = [] if flags is None else flags
-
- ds = xr.open_dataset(filename, chunks='auto')
+ ds = xr.open_mfdataset(filename, chunks='auto')
# handle tif (can only handle single band single variable single year currently)
if 'band' in ds.coords:
@@ -33,7 +32,10 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re
# filter to desired variables
if variables is not None:
- ds = ds[variables]
+ if len(ds) == 1 and len(variables) > 1: # use single layer for all variables
+ ds[variables] = [ds[variables[0]] for i in variables]
+ else:
+ ds = ds[variables]
# create a year dimension if missing, with the years reported for this file in the catalog
if 'year' not in ds.coords:
@@ -41,11 +43,7 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re
else:
ds = ds.sel(year=years, method='nearest') # nearest used for temporal files
ds['year'] = years
-
- # do the year filtering
- if years is not None and 'year' in ds.coords:
- ds = ds.sel(year=years, method='nearest').chunk(chunks=dict(year=1))
- ds['year'] = years
+ ds = ds.chunk(year=1)
# numeric stuff
ds = ds.fillna(0).astype(np.float32)
@@ -59,10 +57,12 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re
# spatial aligning
ds = pad_global(ds)
ds = regrid(ds, target_resolution, method=regrid_method)
+ if bounds is not None:
+ ds = crop(ds, bounds)
ds = ds.chunk(chunks=dict(lat=-1, lon=-1))
if 'month' in ds.coords:
- ds = ds.chunk(chunks=dict(month=12))
+ ds = ds.chunk(month=12)
return ds
@@ -133,6 +133,11 @@ def pad_global(ds):
return ds
+def crop(ds, bounds):
+ lat_min, lat_max, lon_min, lon_max = bounds
+ return ds.sel(lon=slice(lon_min, lon_max), lat=slice(lat_max, lat_min))
+
+
def regrid(ds, target_resolution, method='extensive'):
"""Simple regridding algorithm
diff --git a/tethys/datareader/maps.py b/tethys/datareader/maps.py
index 04ad026..9b24cc7 100644
--- a/tethys/datareader/maps.py
+++ b/tethys/datareader/maps.py
@@ -2,16 +2,17 @@
import pandas as pd
import xarray as xr
-from tethys.datareader.gridded import regrid
+from tethys.datareader.gridded import regrid, pad_global, crop
-def load_region_map(mapfile, masks=False, namefile=None, target_resolution=None, nodata=None, flip_lat=False):
+def load_region_map(mapfile, masks=False, namefile=None, target_resolution=None, bounds=None, nodata=None, flip_lat=False):
""" Load region map.
:param mapfile: path to map file
:param masks: bool whether to convert categorical map to layer of region masks
:param namefile: optional path to csv with region names
:param target_resolution: resolution to coerce map to. If None (default), use base resolution
+ :param bounds: list [lat_min, lat_max, lon_min, lon_max] to crop to
:param nodata: nodata value (like 9999), will be replaced with 0
:param flip_lat: bool, whether the map is "upside down"
"""
@@ -41,9 +42,13 @@ def load_region_map(mapfile, masks=False, namefile=None, target_resolution=None,
da = da.squeeze('band').drop_vars('band')
if target_resolution is not None:
+ da = pad_global(da)
da = regrid(da, target_resolution, method='label')
da = da.chunk(chunks=dict(lat=-1, lon=-1))
+ if bounds is not None:
+ da = crop(da, bounds)
+
# use region names provided in CSV namefile, otherwise check metadata for a names dict
if namefile is not None:
df = pd.read_csv(namefile)
diff --git a/tethys/datareader/regional.py b/tethys/datareader/regional.py
index 083eab7..ddd618f 100644
--- a/tethys/datareader/regional.py
+++ b/tethys/datareader/regional.py
@@ -18,7 +18,7 @@ def load_region_data(gcam_db, sectors, demand_type='withdrawals'):
# remove subsector and technology if present, convert friendly water demand sector names to internal
search_sectors = list(set(unfriendly_sector_name(i.split('/')[0]) for i in sectors))
- inputs = {'withdrawals': ['water_td_*_W', '*_water withdrawals', '*_desalinated water'],
+ inputs = {'withdrawals': ['water_td_*_W', '*_water withdrawals'],
'consumption': ['water_td_*_C', '*_water consumption']}.get(demand_type)
df = conn.runQuery(easy_query('demand-physical', sector=search_sectors, technology='!water_td_*', input=inputs))
@@ -41,6 +41,19 @@ def load_region_data(gcam_db, sectors, demand_type='withdrawals'):
return df
+def extract_resource_name(x):
+ """Removes '_water withdrawals' or '_water consumption' from GCAM sector or resource name.
+
+ :param x: resource name string.
+ :return: cleaned resource name without suffix.
+ """
+ if x.endswith('_water withdrawals'):
+ return x.removesuffix('_water withdrawals')
+ if x.endswith('_water consumption'):
+ return x.removesuffix('_water consumption')
+ return x
+
+
def extract_basin_name(x):
"""Maps 'water_td_irr_basin_C' to '_basin', and water_td_elec_C to ''"""
if x.startswith('water_td_irr_'):
diff --git a/tethys/model.py b/tethys/model.py
index fd7db58..dbaf361 100644
--- a/tethys/model.py
+++ b/tethys/model.py
@@ -19,49 +19,59 @@
from tethys.datareader.gridded import load_file, interp_helper
from tethys.datareader.maps import load_region_map
+from tethys.utils.source_disaggregation import get_source_shares
class Tethys:
"""Model wrapper for Tethys"""
def __init__(
- self,
- config_file=None,
- years=None,
- resolution=0.125,
+ self,
+ config_file=None,
+ years=None,
+ resolution=0.125,
+ bounds=None,
demand_type='withdrawals',
- perform_temporal=False,
- gcam_db=None,
- csv=None,
- output_file=None,
- downscaling_rules=None,
- proxy_files=None,
- map_files=None,
- temporal_files=None,
- temporal_methods=None
+ source_disaggregation=None,
+ gcam_db=None,
+ csv=None,
+ output_dir=None,
+ supersector_iterations=0,
+ downscaling_rules=None,
+ proxy_files=None,
+ map_files=None,
+ temporal_config=None,
+ perform_temporal=False,
+ temporal_methods=None,
+ temporal_files=None,
):
"""Parameters can be specified in a YAML file or passed directly, with the config file taking precedence
:param config_file: path to YAML configuration file containing these parameters
- :param years: list of years to be included spatial downscaling
+ :param years: list of years to be included for spatial downscaling
:param resolution: resolution in degrees for spatial downscaling
+ :param bounds: list [lat_min, lat_max, lon_min, lon_max] to crop to
:param demand_type: choice between “withdrawals” (default) or “consumption”
- :param perform_temporal: choice between False (default) or True
+ :param source_disaggregation: decide whether to disaggregate water demand by source (runoff, groundwater, desal)
:param gcam_db: relative path to a GCAM database
:param csv: relative path to csv file containing inputs
- :param output_file: name of file to write outputs to
+ :param output_dir: directory to write outputs to
+ :param supersector_iterations: number of times to repeat applying individual and total sector constraints, default 0
:param downscaling_rules: mapping from water demand sectors to proxy variables
:param proxy_files: mapping of spatial proxy files to their years/variables
:param map_files: list of files containing region maps
- :param temporal_files: mapping of sector to temporal downscaling method
- :param temporal_methods: files that will be accessible during temporal downscaling
+ :param perform_temporal: if True, apply temporal downscaling
+ :param temporal_methods: legacy mapping of sector to method names in tethys.tdmethods
+ :param temporal_files: legacy mapping of file aliases used by temporal methods
+ :param temporal_config: mapping of sector to temporal downscaling method and arguments
"""
self.root = None
# project level settings
self.years = years
self.resolution = resolution
+ self.bounds = bounds
self.demand_type = demand_type
- self.perform_temporal = perform_temporal
+ self.source_disaggregation = source_disaggregation
# GCAM database info
self.gcam_db = gcam_db
@@ -70,21 +80,29 @@ def __init__(
self.csv = csv
# outputs
- self.output_file = output_file
+ self.output_dir = output_dir
self.downscaling_rules = downscaling_rules
+ self.supersector_iterations = supersector_iterations
self.proxy_files = proxy_files
self.map_files = map_files
- self.temporal_files = temporal_files
+ self.perform_temporal = perform_temporal
self.temporal_methods = temporal_methods
+ self.temporal_files = temporal_files
+ self.temporal_config = temporal_config
# data we'll load or generate later
self.region_masks = None
self.proxies = None
self.inputs = None
self.outputs = None
+ self.shares = None
+ self.griddedshares = None
+ self.disaggregated_sw = None
+ self.disaggregated_gw = None
+ self.irrigation_conveyance_efficiency = None
# settings in YAML override settings passed directly to __init__
if config_file is not None:
@@ -99,29 +117,57 @@ def __init__(
else:
self.root = os.getcwd()
- if self.temporal_methods is None:
- self.temporal_methods = {
- 'domestic': 'domestic',
- 'municipal': 'domestic',
- 'electricity': 'electricity',
- 'irrigation': 'irrigation'
- }
- else:
- self.temporal_methods = {k.lower(): v for k, v in self.temporal_methods.items()}
-
- if self.temporal_files is not None:
- self.temporal_files = {k: os.path.join(self.root, v) for k, v in self.temporal_files.items()}
-
- if self.output_file is not None:
- self.output_file = os.path.join(self.root, self.output_file)
+ if self.output_dir is not None:
+ self.output_dir = os.path.join(self.root, self.output_dir)
self._parse_proxy_files()
+ self._normalize_temporal_config()
+
+ def _resolve_path(self, value):
+ if value is None:
+ return None
+ return value if os.path.isabs(value) else os.path.join(self.root, value)
+
+ # maps method name → {kwarg_name: temporal_files key}
+ _TEMPORAL_FILE_MAP = {
+ 'domestic': {'tasfile': 'tas', 'rfile': 'domr'},
+ 'electricity': {'hddfile': 'hdd', 'cddfile': 'cdd'},
+ 'irrigation': {'irrfile': 'irr'},
+ 'weights': {'weightfile': 'weight'},
+ }
+
+ def _normalize_temporal_config(self):
+ if self.temporal_config is not None:
+ for cfg in self.temporal_config.values():
+ cfg['kwargs'] = {k: self._resolve_path(v) if k.endswith('file') or k == 'gcam_db' else v
+ for k, v in cfg.get('kwargs', {}).items()}
+ return
+
+ if not self.perform_temporal and not self.temporal_methods and not self.temporal_files:
+ return
+
+ files = self.temporal_files or {}
+ methods = self.temporal_methods or {
+ 'Municipal': 'domestic', 'Electricity': 'electricity', 'Irrigation': 'irrigation'}
+
+ config = {}
+ for sector, method in methods.items():
+ kwargs = {k: self._resolve_path(files.get(v))
+ for k, v in self._TEMPORAL_FILE_MAP.get(method, {}).items()}
+ if method in ('electricity', 'irrigation') and self.map_files:
+ kwargs['regionfile'] = self._resolve_path(self.map_files[0])
+ if method == 'electricity':
+ kwargs['gcam_db'] = self._resolve_path(self.gcam_db)
+ config[sector] = {'method': method,
+ 'kwargs': {k: v for k, v in kwargs.items() if v is not None}}
+
+ self.temporal_config = config or None
def _parse_proxy_files(self):
"""Handle several shorthand expressions in the proxy catalog"""
out = dict()
- # name may be something like "ssp1_[YEAR].tif", which actually refers to multiple files
+ # name may be something like "ssp1_{YEAR}.tif", which actually refers to multiple files
# such as "ssp1_2010.tif" and "ssp1_2020.tif" when info['years'] == [2010, 2020]
for name, info in self.proxy_files.items():
# promote strs to list
@@ -162,17 +208,17 @@ def _load_proxies(self):
"""Load all proxies from the catalog, regrid to target spatial resolution, and interpolate to target years"""
print('Loading Proxy Data')
# align each variable spatially
- dataarrays = [da for filename, info in self.proxy_files.items() for da in
- load_file(os.path.join(self.root, filename), self.resolution, **info).values()]
+ dataarrays = [da for filename, info in self.proxy_files.items() for da in load_file(
+ os.path.join(self.root, filename), self.resolution, bounds=self.bounds, **info).values()]
print('Interpolating Proxies')
# interpolate each variable, then merge to one array
- self.proxies = xr.merge(interp_helper(xr.concat([da for da in dataarrays if da.name == variable], 'year'),
+ self.proxies = xr.merge(interp_helper(xr.concat([da for da in dataarrays if da.name == variable], 'year', coords='minimal'),
self.years) for variable in set(da.name for da in dataarrays)).to_array()
def _load_region_masks(self):
self.region_masks = xr.concat([load_region_map(os.path.join(self.root, filename), masks=True,
- target_resolution=self.resolution)
+ target_resolution=self.resolution, bounds=self.bounds)
for filename in self.map_files], dim='region')
def _load_inputs(self):
@@ -186,6 +232,8 @@ def _load_inputs(self):
# filter inputs to valid regions and years
self.inputs = self.inputs[(self.inputs.region.isin(self.region_masks.region.data)) &
(self.inputs.year.isin(self.years))]
+ # replace "/" with "_" because it causes problems with netcdf variable names
+ self.inputs['sector'] = self.inputs.sector.str.replace('/', '_')
def downscale(self, distribution, inputs, region_masks):
"""Actual spatial downscaling happens here
@@ -209,13 +257,45 @@ def downscale(self, distribution, inputs, region_masks):
return out
+ def disaggregate_source(self):
+ """Disaggregate water demand by source (runoff, groundwater, desal)"""
+
+ print('Disaggregating Source')
+
+ # initialize source shares object
+ get_shares = get_source_shares(gcam_db=self.gcam_db, demand_type=self.demand_type,
+ region_masks=self.region_masks, years=self.years)
+
+ # query and calculate shares
+ shares_df = get_shares.calculate_shares()
+
+ # apply shares to the gridded region masks
+ gridded_shares = get_shares.generate_gridded_shares(shares_df)
+
+ return gridded_shares
+
+
def run_model(self):
self.outputs = xr.Dataset()
+ self.griddedshares = xr.Dataset()
+ self.disaggregated_sw = xr.Dataset()
+ self.disaggregated_gw = xr.Dataset()
self._load_proxies()
self._load_region_masks()
self._load_inputs()
+ # disaggregate water supply source
+ # disaggregate water supply source
+ if self.source_disaggregation:
+ gshares = xr.Dataset(self.disaggregate_source())
+
+ # store the disaggregated results
+ self.griddedshares = gshares
+ if self.output_dir is not None:
+ filename = os.path.join(self.output_dir, f'gridded_runoff_shares.nc')
+ gshares['runoff'].to_netcdf(filename)
+
for supersector, rules in self.downscaling_rules.items():
print(f'Downscaling {supersector}')
@@ -224,7 +304,8 @@ def run_model(self):
rules = {supersector: rules}
proxies = xr.Dataset(
- {sector: self.proxies.sel(variable=proxy if isinstance(proxy, list) else [proxy]).sum('variable')
+ {sector.replace('/', '_'):
+ self.proxies.sel(variable=proxy if isinstance(proxy, list) else [proxy]).sum('variable')
for sector, proxy in rules.items()}
).to_array(dim='sector')
@@ -246,29 +327,50 @@ def run_model(self):
region_masks_total = self.region_masks.sel(region=inputs_total.region)
- downscaled = self.downscale(downscaled, inputs_total, region_masks_total)
+ # alternate between applying total and individual sector constraints so that both are met
+ for i in range(self.supersector_iterations):
+ downscaled = self.downscale(downscaled, inputs_total, region_masks_total)
+ downscaled = self.downscale(downscaled, inputs, region_masks)
+
+ # in a lot of cases this could be optimized by solving the intersections at region scale first,
+ # then downscaling once, but harder to implement, especially if differing regions are not subsets
- if self.perform_temporal:
+ # handle irrigation withdrawal conveyance efficiency if set
+ if (supersector=='Irrigation') and (self.demand_type=='withdrawals') and (self.irrigation_conveyance_efficiency is not None):
+ downscaled = downscaled / self.irrigation_conveyance_efficiency
+
+ # write spatial downscaling outputs
+ if self.output_dir is not None:
+ filename = os.path.join(self.output_dir, f'{supersector}_{self.demand_type}.nc')
+ encoding = {sector: {'zlib': True, 'complevel': 5} for sector in downscaled.sector.data}
+ downscaled.to_dataset(dim='sector').to_netcdf(filename, encoding=encoding)
+ downscaled = xr.open_dataset(filename).to_array(dim='sector') # hopefully this keeps dask happy
+
+ if self.temporal_config is not None:
# calculate the monthly distributions (share of annual) for each year
# this is how we'll do this for now
- if supersector.lower() in self.temporal_methods:
- module = f'tethys.tdmethods.{self.temporal_methods[supersector.lower()]}'
- distribution = getattr(importlib.import_module(module), 'temporal_distribution')(self)
+ if supersector in self.temporal_config:
+ module = f'tethys.tdmethods.' + self.temporal_config[supersector]['method']
+ temporal_distribution = getattr(importlib.import_module(module), 'temporal_distribution')
+ years = range(self.years[0], self.years[-1] + 1)
+ kwargs = self.temporal_config[supersector]['kwargs']
+ distribution = temporal_distribution(years=years, resolution=self.resolution,
+ bounds=self.bounds, **kwargs)
else:
+ # fall back to uniform distribution
distribution = xr.DataArray(np.full(12, 1/12, np.float32), coords=dict(month=range(1, 13)))
downscaled = interp_helper(downscaled) * distribution
- self.outputs.update(downscaled.to_dataset(dim='sector'))
+ # write temporal downscaling outputs
+ if self.output_dir is not None:
+ filename = os.path.join(self.output_dir, f'{supersector}_{self.demand_type}_monthly.nc')
+ encoding = {sector: {'zlib': True, 'complevel': 5} for sector in downscaled.sector.data}
+ downscaled.to_dataset(dim='sector').compute().to_netcdf(filename, encoding=encoding)
+ downscaled = xr.open_dataset(filename).to_array(dim='sector') # hopefully this keeps dask happy
- if self.output_file is not None:
- print('Writing Outputs')
- # cannot have '/' in netcdf variable name
- self.outputs = self.outputs.rename({name: name.replace('/', '_') for name in list(self.outputs)})
- # compression
- encoding = {variable: {'zlib': True, 'complevel': 5} for variable in self.outputs}
- self.outputs.to_netcdf(self.output_file, encoding=encoding)
+ self.outputs.update(downscaled.to_dataset(dim='sector'))
def reaggregate(self, region_masks=None):
"""Reaggregate from grid cells to regions
diff --git a/tethys/tdmethods/domestic.py b/tethys/tdmethods/domestic.py
index ac1afbf..01d7d8b 100644
--- a/tethys/tdmethods/domestic.py
+++ b/tethys/tdmethods/domestic.py
@@ -2,12 +2,17 @@
from tethys.datareader.gridded import load_file
-def temporal_distribution(model):
+def temporal_distribution(years, resolution=None, tasfile=None, rfile=None, tasvar='tas', rvar='amplitude', bounds=None):
"""Temporal downscaling for domestic water demand using algorithm from Wada et al. (2011)"""
- years = range(model.years[0], model.years[-1] + 1)
- tas = load_file(model.temporal_files['tas'], model.resolution, years, regrid_method='intensive')['tas']
- amplitude = load_file(model.temporal_files['domr'], model.resolution, years, regrid_method='label')['amplitude']
+ if hasattr(years, 'temporal_config'):
+ model = years
+ cfg = dict((model.temporal_config or {}).get('Municipal', {}).get('kwargs', {}) or {})
+ cfg.setdefault('bounds', model.bounds)
+ return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg)
+
+ tas = load_file(tasfile, resolution, years, bounds=bounds, regrid_method='intensive', variables=[tasvar])[tasvar]
+ amplitude = load_file(rfile, resolution, years, bounds=bounds, regrid_method='label', variables=[rvar])[rvar]
ranges = tas.max(dim='month') - tas.min(dim='month')
ranges = xr.where(ranges != 0, ranges, 1) # avoid 0/0
diff --git a/tethys/tdmethods/electricity.py b/tethys/tdmethods/electricity.py
index 67830b6..4a92d0d 100644
--- a/tethys/tdmethods/electricity.py
+++ b/tethys/tdmethods/electricity.py
@@ -1,25 +1,31 @@
-import os
import numpy as np
import pandas as pd
import xarray as xr
from tethys.datareader.regional import elec_sector_weights
from tethys.datareader.gridded import load_file, interp_helper
+from tethys.datareader.maps import load_region_map
-def temporal_distribution(model):
+def temporal_distribution(years, resolution=None, hddfile=None, cddfile=None, regionfile=None, gcam_db=None,
+ hddvar='hdd', cddvar='cdd', bounds=None):
"""Temporal downscaling of water demand for electricity generation using algorithm from Voisin et al. (2013)"""
- years = range(model.years[0], model.years[-1] + 1)
- hdd = load_file(model.temporal_files['hdd'], model.resolution, years, regrid_method='intensive')['hdd']
- cdd = load_file(model.temporal_files['cdd'], model.resolution, years, regrid_method='intensive')['cdd']
-
- weights = elec_sector_weights(os.path.join(model.root, model.gcam_db))
- weights = weights[(weights.region.isin(model.inputs.region[model.inputs.sector == 'Electricity'])) &
- (weights.region.isin(model.region_masks.region.data)) &
- (weights.year.isin(model.years))].set_index(['region', 'sector', 'year']
- )['value'].to_xarray().fillna(0).astype(np.float32)
+
+ if hasattr(years, 'temporal_config'):
+ model = years
+ cfg = dict((model.temporal_config or {}).get('Electricity', {}).get('kwargs', {}) or {})
+ cfg.setdefault('bounds', model.bounds)
+ return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg)
+
+ # get weights of heating/cooling/other by location and time
+ regions = load_region_map(regionfile, masks=True, target_resolution=resolution, bounds=bounds)
+ weights = elec_sector_weights(gcam_db)
+ weights = weights[(weights.region.isin(regions.region.data)) & (weights.year.isin(years))]
+ weights = weights.set_index(['region', 'sector', 'year'])['value'].to_xarray().fillna(0).astype(np.float32)
+ weights = weights.dot(regions.sel(region=weights.region), dims='region')
weights = interp_helper(weights)
- region_masks = model.region_masks.sel(region=weights.region)
+ hdd = load_file(hddfile, resolution, years, bounds=bounds, regrid_method='intensive', variables=[hddvar])[hddvar]
+ cdd = load_file(cddfile, resolution, years, bounds=bounds, regrid_method='intensive', variables=[cddvar])[cddvar]
# this formula is annoying to implement because of the hdd/cdd thresholds and reallocation of weights
hdd_sums = hdd.sum(dim='month')
@@ -33,19 +39,13 @@ def temporal_distribution(model):
hdd = xr.where((hdd_sums < 650) & (cdd_sums < 450), 1 / 12, hdd)
cdd = xr.where((hdd_sums < 650) & (cdd_sums < 450), 1 / 12, cdd)
- # redo sums based on reallocation
- hdd_sums = hdd.sum(dim='month')
- cdd_sums = cdd.sum(dim='month')
# prevent 0/0
- hdd_sums = xr.where(hdd_sums != 0, hdd_sums, 1)
- cdd_sums = xr.where(cdd_sums != 0, cdd_sums, 1)
- hdd /= hdd_sums
- cdd /= cdd_sums
+ hdd /= hdd.sum(dim='month').where(lambda x: x != 0, 1)
+ cdd /= cdd.sum(dim='month').where(lambda x: x != 0, 1)
distribution = xr.concat([hdd, cdd, xr.full_like(hdd, 1/12)],
dim=pd.Series(['Heating', 'Cooling', 'Other'], name='sector'))
- distribution = distribution.where(region_masks, 0)
- distribution = distribution.dot(weights, dims=('sector', 'region'))
+ distribution = distribution.dot(weights, dims='sector')
return distribution
diff --git a/tethys/tdmethods/irrigation.py b/tethys/tdmethods/irrigation.py
index e9275b4..59781fd 100644
--- a/tethys/tdmethods/irrigation.py
+++ b/tethys/tdmethods/irrigation.py
@@ -1,16 +1,19 @@
from tethys.datareader.gridded import load_file
+from tethys.datareader.maps import load_region_map
-def temporal_distribution(model):
+def temporal_distribution(years, resolution=None, regionfile=None, irrfile=None, irrvar='pirrww', bounds=None):
"""Temporal downscaling of irrigation water demand"""
- years = range(model.years[0], model.years[-1] + 1)
- irr = load_file(model.temporal_files['irr'], model.resolution, years, regrid_method='label')['pirrww']
+ if hasattr(years, 'temporal_config'):
+ model = years
+ cfg = dict((model.temporal_config or {}).get('Irrigation', {}).get('kwargs', {}) or {})
+ cfg.setdefault('bounds', model.bounds)
+ return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg)
- irr_regions = model.inputs.region[(model.inputs.sector == 'Irrigation') &
- (model.inputs.region.isin(model.region_masks.region.data))].unique()
+ irr = load_file(irrfile, resolution, years, bounds=bounds, regrid_method='label', variables=[irrvar])[irrvar]
- region_masks = model.region_masks.sel(region=irr_regions)
+ region_masks = load_region_map(regionfile, masks=True, target_resolution=resolution, bounds=bounds)
irr_grouped = irr.where(region_masks, 0)
month_sums = irr_grouped.sum(dim=('lat', 'lon'))
diff --git a/tethys/tdmethods/weights.py b/tethys/tdmethods/weights.py
new file mode 100644
index 0000000..4af7e33
--- /dev/null
+++ b/tethys/tdmethods/weights.py
@@ -0,0 +1,19 @@
+from tethys.datareader.gridded import load_file
+
+
+def temporal_distribution(years, resolution=None, weightfile=None, weightvar='weight', regrid_method='intensive',
+ prenormalized=False, bounds=None):
+ """Load a monthly distribution from filename"""
+
+ if hasattr(years, 'temporal_config'):
+ model = years
+ cfg = dict((model.temporal_config or {}).get('Weights', {}).get('kwargs', {}) or {})
+ cfg.setdefault('bounds', model.bounds)
+ return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg)
+
+ distribution = load_file(weightfile, resolution, years, bounds=bounds, regrid_method=regrid_method,
+ variables=[weightvar])[weightvar]
+ if prenormalized is False:
+ distribution /= distribution.sum(dim='month').where(lambda x: x != 0, 1)
+
+ return distribution
diff --git a/tethys/utils/install_supplement.py b/tethys/utils/install_supplement.py
index 793d191..d7aef66 100644
--- a/tethys/utils/install_supplement.py
+++ b/tethys/utils/install_supplement.py
@@ -17,6 +17,8 @@
'2.0.0': 'https://zenodo.org/record/7569652/files/example.zip?download=1',
'2.0.1': 'https://zenodo.org/record/7569652/files/example.zip?download=1',
'2.0.2': 'https://zenodo.org/record/7569652/files/example.zip?download=1',
+ '2.1.0': 'https://zenodo.org/record/7569652/files/example.zip?download=1',
+ '2.2.0': 'https://zenodo.org/record/7569652/files/example.zip?download=1'
}
@@ -47,7 +49,7 @@ def get_example_data(
example_data_directory = default_download_dir
# get the current version of tethys that is installed
- current_version = importlib.metadata.version('tethys')
+ current_version = importlib.metadata.version('tethys-downscaling')
try:
url = DATA_VERSION_URLS[current_version]
diff --git a/tethys/utils/region_to_id_mapping.py b/tethys/utils/region_to_id_mapping.py
index 3992f56..1457917 100644
--- a/tethys/utils/region_to_id_mapping.py
+++ b/tethys/utils/region_to_id_mapping.py
@@ -507,5 +507,479 @@
'Papua New Guinea Coast': 173,
'Tasmania': 211,
'Solomon Islands': 176
+ },
+ "basin_name_mapping": {
+ 'Arctic Ocean Islands': 'ArcticIsl',
+ 'Northwest Territories': 'NWTerr',
+ 'Siberia North Coast': 'SiberiaN',
+ 'Siberia West Coast': 'SiberiaW',
+ 'Kara Sea Coast': 'KaraSea',
+ 'Lena': 'LenaR',
+ 'Pacific and Arctic Coast': 'PacArctic',
+ 'Scandinavia North Coast': 'ScndnvN',
+ 'Russia Barents Sea Coast': 'BarentsSea',
+ 'Mackenzie': 'Mackenzie',
+ 'Iceland': 'Iceland',
+ 'Sweden': 'Sweden',
+ 'Finland': 'Finland',
+ 'Northern Dvina': 'DvinaRN',
+ 'Hudson Bay Coast': 'HudsonBay',
+ 'Scotland': 'Scotland',
+ 'Neva': 'NevaR',
+ 'Volga': 'VolgaR',
+ 'Atlantic Ocean Seaboard': 'CanAtl',
+ 'Baltic Sea Coast': 'BalticSea',
+ 'Denmark Germany Coast': 'DnkGrmCst',
+ 'Narva': 'NarvaR',
+ 'Saskatchewan Nelson': 'NelsonR',
+ 'Ireland': 'Ireland',
+ 'Daugava': 'DaugavaR',
+ 'England and Wales': 'EngWales',
+ 'Fraser': 'FraserR',
+ 'Ems Weser': 'EmsWeserR',
+ 'Oder': 'OderR',
+ 'Wisla': 'WislaR',
+ 'Elbe': 'ElbeR',
+ 'Rhine': 'RhineR',
+ 'Poland Coast': 'PolandCst',
+ 'Churchill': 'ChurchillR',
+ 'Neman': 'NemanR',
+ 'Scheldt': 'ScheldtR',
+ 'Russia South East Coast': 'RusCstSE',
+ 'Ural': 'UralR',
+ 'Dnieper': 'DnieperR',
+ 'St Lawrence': 'StLwrncR',
+ 'France West Coast': 'FranceCstW',
+ 'Gobi Interior': 'Gobi',
+ 'Amur': 'AmurR',
+ 'Loire': 'LoireR',
+ 'Caspian Sea Coast': 'CaspianNE',
+ 'Seine': 'SeineR',
+ 'Black Sea North Coast': 'BlackSeaN',
+ 'Yenisei': 'YeniseiR',
+ 'Dniester': 'DniesterR',
+ 'Italy East Coast': 'ItalyCstE',
+ 'Japan': 'Japan',
+ 'Caspian Sea East Coast': 'CaspianE',
+ 'Don': 'DonR',
+ 'Danube': 'DanubeR',
+ 'Adriatic Sea Greece Black Sea Coast': 'AdrBlkSea',
+ 'Ob': 'ObR',
+ 'Po': 'PoR',
+ 'Amu Darya': 'AmuDaryaR',
+ 'Italy West Coast': 'ItalyCstW',
+ 'Spain Portugal Atlantic Coast': 'IberiaCst',
+ 'France South Coast': 'FranceCstS',
+ 'Rhone': 'RhoneR',
+ 'Mediterranean Sea Islands': 'MeditIsl',
+ 'Gironde': 'Gironde',
+ 'North and South Korea': 'Korea',
+ 'Bo Hai Korean Bay North Coast': 'BoHai',
+ 'Spain South and East Coast': 'SpainCstSE',
+ 'Lake Balkash': 'LBalkash',
+ 'Tiber': 'TiberR',
+ 'Black Sea South Coast': 'BlackSeaS',
+ 'Tagus': 'TagusR',
+ 'Caspian Sea South West Coast': 'CaspianSW',
+ 'Ebro': 'EbroR',
+ 'Douro': 'DouroR',
+ 'Mediterranean Sea East Coast': 'MeditE',
+ 'Syr Darya': 'SyrDaryaR',
+ 'Ziya He Interior': 'ZiyaHe',
+ 'China Coast': 'ChinaCst',
+ 'Huang He': 'HuangHeR',
+ 'Mediterranean South Coast': 'MeditS',
+ 'Guadiana': 'GuadianaR',
+ 'Central Iran': 'Iran',
+ 'Guadalquivir': 'GuadalqR',
+ 'Tigris Euphrates': 'TigrEuphR',
+ 'Tarim Interior': 'Tarim',
+ 'Africa North West Coast': 'AfrCstNW',
+ 'Nile': 'NileR',
+ 'Persian Gulf Coast': 'PersianGulf',
+ 'Indus': 'IndusR',
+ 'Farahrud': 'FarahrudR',
+ 'Baja California': 'MexBaja',
+ 'Plateau of Tibet Interior': 'Tibet',
+ 'Red Sea East Coast': 'RedSeaE',
+ 'Arabian Peninsula': 'ArabianP',
+ 'Dead Sea': 'DeadSea',
+ 'Mexico Northwest Coast': 'MexCstNW',
+ 'Helmand': 'Helmand',
+ 'Sinai Peninsula': 'SinaiP',
+ 'Eastern Jordan Syria': 'EJrdnSyr',
+ 'Africa Red Sea Gulf of Aden Coast': 'AfrCstNE',
+ 'Caribbean': 'Caribbean',
+ 'HamuniMashkel': 'HamuMashR',
+ 'Taiwan': 'Taiwan',
+ 'Arabian Sea Coast': 'ArabianSea',
+ 'North Gulf': 'MexGulf',
+ 'Yangtze': 'Yangtze',
+ 'Sabarmati': 'SabarmatiR',
+ 'Xun Jiang': 'XunJiang',
+ 'Hong (Red River)': 'Hong',
+ 'Ganges Bramaputra': 'GangesR',
+ 'Yucatan Peninsula': 'YucatanP',
+ 'South China Sea Coast': 'SChinaSea',
+ 'Mahi': 'MahiR',
+ 'Mexico Interior': 'MexInt',
+ 'Pacific Central Coast': 'MexCstW',
+ 'Bay of Bengal North East Coast': 'BengalBay',
+ 'Tapti': 'TaptiR',
+ 'Yasai': 'BengalW',
+ 'Philippines': 'Phlppns',
+ 'Brahmani': 'BrahmaniR',
+ 'North Marina Islands and Guam': 'Guam',
+ 'Mahanadi': 'MahanadiR',
+ 'Godavari': 'GodavariR',
+ 'Hainan': 'Hainan',
+ 'Mekong': 'Mekong',
+ 'Viet Nam Coast': 'VietnamCst',
+ 'Salween': 'Salween',
+ 'India North East Coast': 'IndCstNE',
+ 'India West Coast': 'IndCstW',
+ 'Papaloapan': 'Papaloapan',
+ 'Rio Lerma': 'RioLerma',
+ 'Rio Verde': 'RioVerde',
+ 'Grijalva Usumacinta': 'GrijUsuR',
+ 'Rio Balsas': 'RioBalsas',
+ 'Southern Central America': 'CntAmer',
+ 'Isthmus of Tehuantepec': 'Tehuantpc',
+ 'Irrawaddy': 'IrrawaddyR',
+ 'Sittaung': 'SittaungR',
+ 'Peninsula Malaysia': 'MalaysiaP',
+ 'Krishna': 'KrishnaR',
+ 'Andaman Nicobar Islands': 'AdnNicIsl',
+ 'Africa West Coast': 'AfrCstW',
+ 'Caribbean Coast': 'SAmerCstN',
+ 'Africa North Interior': 'AfrIntN',
+ 'India East Coast': 'IndCstE',
+ 'Chao Phraya': 'ChaoPhrR',
+ 'Pennar': 'PennarR',
+ 'Gulf of Thailand Coast': 'ThaiGulf',
+ 'Niger': 'NigerR',
+ 'Micronesia': 'Micronesia',
+ 'Lake Chad': 'LChad',
+ 'Senegal': 'SenegalR',
+ 'Cauvery': 'CauveryR',
+ 'Sri Lanka': 'SriLanka',
+ 'India South Coast': 'IndCstS',
+ 'Orinoco': 'OrinocoR',
+ 'Colombia Ecuador Pacific Coast': 'ColEcuaCst',
+ 'Palau and East Indonesia': 'IdnE',
+ 'North Borneo Coast': 'BorneoCstN',
+ 'Volta': 'VoltaR',
+ 'Northeast South America South Atlantic Coast': 'SAmerCstNE',
+ 'Gulf of Guinea': 'GuineaGulf',
+ 'Sumatra': 'Sumatra',
+ 'Sulawesi': 'Sulawesi',
+ 'Kalimantan': 'Kalimantan',
+ 'Magdalena': 'MagdalenaR',
+ 'Irian Jaya Coast': 'IrianJaya',
+ 'Amazon': 'AmazonR',
+ 'South Chile Pacific Coast': 'ChileCstS',
+ 'Shebelli Juba': 'ShebJubR',
+ 'Africa East Central Coast': 'AfrCstE',
+ 'North Brazil South Atlantic Coast': 'BrzCstN',
+ 'Papua New Guinea Coast': 'PapuaCst',
+ 'Tocantins': 'TocantinsR',
+ 'Java Timor': 'JavaTimor',
+ 'Solomon Islands': 'SolomonIsl',
+ 'Madagascar': 'Madagascar',
+ 'Sepik': 'SepikR',
+ 'Rift Valley': 'RiftValley',
+ 'Peru Pacific Coast': 'PeruCst',
+ 'Fly': 'FlyR',
+ 'Angola Coast': 'AngolaCst',
+ 'Congo': 'CongoR',
+ 'Australia North Coast': 'AusCstN',
+ 'South Pacific Islands': 'NewCaledn',
+ 'East Brazil South Atlantic Coast': 'BrzCstE',
+ 'Parnaiba': 'ParnaibaR',
+ 'Zambezi': 'ZambeziR',
+ 'Australia East Coast': 'AusCstE',
+ 'Africa Indian Ocean Coast': 'AfrCstSE',
+ 'Australia West Coast': 'AusCstW',
+ 'Sao Francisco': 'SaoFrancR',
+ 'Australia Interior': 'AusInt',
+ 'Orange': 'OrangeR',
+ 'Uruguay Brazil South Atlantic Coast': 'BrzCstS',
+ 'Namibia Coast': 'AfrCstSW',
+ 'Africa South Interior': 'AfrIntS',
+ 'South Africa South Coast': 'AfrCstS',
+ 'Limpopo': 'LimpopoR',
+ 'La Puna Region': 'LaPuna',
+ 'New Zealand': 'NewZealand',
+ 'Australia South Coast': 'AusCstS',
+ 'Mar Chiquita': 'MarChiq',
+ 'South Africa West Coast': 'AfrCstSSW',
+ 'Salinas Grandes': 'Salinas',
+ 'La Plata': 'RioLaPlata',
+ 'North Chile Pacific Coast': 'ChileCstN',
+ 'Murray Darling': 'MurrayDrlg',
+ 'Pampas Region': 'Pampas',
+ 'North Argentina South Atlantic Coast': 'ArgCstN',
+ 'Tasmania': 'Tasmania',
+ 'South America Colorado': 'ArgColoR',
+ 'Negro': 'NegroR',
+ 'Central Patagonia Highlands': 'Patagonia',
+ 'South Argentina South Atlantic Coast': 'ArgCstS',
+ 'Antarctica': 'Antarctica',
+ 'California River Basin': 'California',
+ 'Upper Mississippi Basin': 'MissppRN',
+ 'Lower Mississippi River Basin': 'MissppRS',
+ 'Upper Colorado River Basin': 'UsaColoRN',
+ 'Lower Colorado River Basin': 'UsaColoRS',
+ 'Great Basin': 'GreatBasin',
+ 'Missouri River Basin': 'MissouriR',
+ 'Arkansas White Red Basin': 'ArkWhtRedR',
+ 'Texas Gulf Coast Basin': 'TexasCst',
+ 'South Atlantic Gulf Basin': 'UsaCstSE',
+ 'Great Lakes Basin': 'GreatLakes',
+ 'Ohio River Basin': 'OhioR',
+ 'Pacific Northwest Basin': 'UsaPacNW',
+ 'Tennessee River Basin': 'TennR',
+ 'Rio Grande River Basin': 'RioGrande',
+ 'New England Basin': 'UsaCstNE',
+ 'Mid Atlantic Basin': 'UsaCstE',
+ 'Hawaii': 'Hawaii',
+ 'Narmada': 'NarmadaR'
+ },
+ "basin_name_mapping_im3": {
+ 'Arctic Ocean Islands': 'ArcticIsl',
+ 'Northwest Territories': 'NWTerr',
+ 'Siberia North Coast': 'SiberiaN',
+ 'Siberia West Coast': 'SiberiaW',
+ 'Kara Sea Coast': 'KaraSea',
+ 'Lena': 'LenaR',
+ 'Pacific and Arctic Coast': 'PacArctic',
+ 'Scandinavia North Coast': 'ScndnvN',
+ 'Russia Barents Sea Coast': 'BarentsSea',
+ 'Mackenzie': 'Mackenzie',
+ 'Iceland': 'Iceland',
+ 'Sweden': 'Sweden',
+ 'Finland': 'Finland',
+ 'Northern Dvina': 'DvinaRN',
+ 'Hudson Bay Coast': 'HudsonBay',
+ 'Scotland': 'Scotland',
+ 'Neva': 'NevaR',
+ 'Volga': 'VolgaR',
+ 'Atlantic Ocean Seaboard': 'CanAtl',
+ 'Baltic Sea Coast': 'BalticSea',
+ 'Denmark Germany Coast': 'DnkGrmCst',
+ 'Narva': 'NarvaR',
+ 'Saskatchewan Nelson': 'NelsonR',
+ 'Ireland': 'Ireland',
+ 'Daugava': 'DaugavaR',
+ 'England and Wales': 'EngWales',
+ 'Fraser': 'FraserR',
+ 'Ems Weser': 'EmsWeserR',
+ 'Oder': 'OderR',
+ 'Wisla': 'WislaR',
+ 'Elbe': 'ElbeR',
+ 'Rhine': 'RhineR',
+ 'Poland Coast': 'PolandCst',
+ 'Churchill': 'ChurchillR',
+ 'Neman': 'NemanR',
+ 'Scheldt': 'ScheldtR',
+ 'Russia South East Coast': 'RusCstSE',
+ 'Ural': 'UralR',
+ 'Dnieper': 'DnieperR',
+ 'St Lawrence': 'StLwrncR',
+ 'France West Coast': 'FranceCstW',
+ 'Gobi Interior': 'Gobi',
+ 'Amur': 'AmurR',
+ 'Loire': 'LoireR',
+ 'Caspian Sea Coast': 'CaspianNE',
+ 'Seine': 'SeineR',
+ 'Black Sea North Coast': 'BlackSeaN',
+ 'Yenisey': 'YeniseiR',
+ 'Dniester': 'DniesterR',
+ 'Italy East Coast': 'ItalyCstE',
+ 'Japan': 'Japan',
+ 'Caspian Sea East Coast': 'CaspianE',
+ 'Don': 'DonR',
+ 'Danube': 'DanubeR',
+ 'Adriatic Sea Greece Black Sea Coast': 'AdrBlkSea',
+ 'Ob': 'ObR',
+ 'Po': 'PoR',
+ 'Amu Darya': 'AmuDaryaR',
+ 'Italy West Coast': 'ItalyCstW',
+ 'Spain Portugal Atlantic Coast': 'IberiaCst',
+ 'France South Coast': 'FranceCstS',
+ 'Rhone': 'RhoneR',
+ 'Mediterranean Sea Islands': 'MeditIsl',
+ 'Gironde': 'Gironde',
+ 'North and South Korea': 'Korea',
+ 'Bo Hai Korean Bay North Coast': 'BoHai',
+ 'Spain South and East Coast': 'SpainCstSE',
+ 'Lake Balkash': 'LBalkash',
+ 'Tiber': 'TiberR',
+ 'Black Sea South Coast': 'BlackSeaS',
+ 'Tagus': 'TagusR',
+ 'Caspian Sea South West Coast': 'CaspianSW',
+ 'Ebro': 'EbroR',
+ 'Douro': 'DouroR',
+ 'Mediterranean Sea East Coast': 'MeditE',
+ 'Syr Darya': 'SyrDaryaR',
+ 'Ziya He Interior': 'ZiyaHe',
+ 'China Coast': 'ChinaCst',
+ 'Huang He': 'HuangHeR',
+ 'Mediterranean South Coast': 'MeditS',
+ 'Guadiana': 'GuadianaR',
+ 'Central Iran': 'Iran',
+ 'Guadalquivir': 'GuadalqR',
+ 'Tigris Euphrates': 'TigrEuphR',
+ 'Tarim Interior': 'Tarim',
+ 'Africa North West Coast': 'AfrCstNW',
+ 'Nile': 'NileR',
+ 'Persian Gulf Coast': 'PersianGulf',
+ 'Indus': 'IndusR',
+ 'Farahrud': 'FarahrudR',
+ 'Baja California': 'MexBaja',
+ 'Plateau of Tibet Interior': 'Tibet',
+ 'Red Sea East Coast': 'RedSeaE',
+ 'Arabian Peninsula': 'ArabianP',
+ 'Dead Sea': 'DeadSea',
+ 'Mexico Northwest Coast': 'MexCstNW',
+ 'Helmand': 'Helmand',
+ 'Sinai Peninsula': 'SinaiP',
+ 'Eastern Jordan Syria': 'EJrdnSyr',
+ 'Africa Red Sea Gulf of Aden Coast': 'AfrCstNE',
+ 'Caribbean': 'Caribbean',
+ 'Hamun i Mashkel': 'HamuMashR',
+ 'Taiwan': 'Taiwan',
+ 'Arabian Sea Coast': 'ArabianSea',
+ 'North Gulf': 'MexGulf',
+ 'Yangtze': 'Yangtze',
+ 'Sabarmati': 'SabarmatiR',
+ 'Xun Jiang': 'XunJiang',
+ 'Hong Red River': 'Hong',
+ 'Ganges Bramaputra': 'GangesR',
+ 'Yucatan Peninsula': 'YucatanP',
+ 'South China Sea Coast': 'SChinaSea',
+ 'Mahi': 'MahiR',
+ 'Mexico Interior': 'MexInt',
+ 'Pacific Central Coast': 'MexCstW',
+ 'Bay of Bengal North East Coast': 'BengalBay',
+ 'Tapti': 'TaptiR',
+ 'Yasai': 'BengalW',
+ 'Philippines': 'Phlppns',
+ 'Brahamani': 'BrahmaniR',
+ 'North Marina Islands and Guam': 'Guam',
+ 'Mahandi': 'MahanadiR',
+ 'Godavari': 'GodavariR',
+ 'Hainan': 'Hainan',
+ 'Mekong': 'Mekong',
+ 'Viet Nam Coast': 'VietnamCst',
+ 'Salween': 'Salween',
+ 'India North East Coast': 'IndCstNE',
+ 'India West Coast': 'IndCstW',
+ 'Papaloapan': 'Papaloapan',
+ 'Rio Lerma': 'RioLerma',
+ 'Rio Verde': 'RioVerde',
+ 'Grijalva Usumacinta': 'GrijUsuR',
+ 'Rio Balsas': 'RioBalsas',
+ 'Southern Central America': 'CntAmer',
+ 'Isthmus of Tehuantepec': 'Tehuantpc',
+ 'Irrawaddy': 'IrrawaddyR',
+ 'Sittang': 'SittaungR',
+ 'Peninsula Malaysia': 'MalaysiaP',
+ 'Krishna': 'KrishnaR',
+ 'Andaman Nicobar Islands': 'AdnNicIsl',
+ 'Africa West Coast': 'AfrCstW',
+ 'Caribbean Coast': 'SAmerCstN',
+ 'Africa North Interior': 'AfrIntN',
+ 'India East Coast': 'IndCstE',
+ 'Chao Phraya': 'ChaoPhrR',
+ 'Pennar': 'PennarR',
+ 'Gulf of Thailand Coast': 'ThaiGulf',
+ 'Niger': 'NigerR',
+ 'Micronesia': 'Micronesia',
+ 'Lake Chad': 'LChad',
+ 'Senegal': 'SenegalR',
+ 'Cauvery': 'CauveryR',
+ 'Sri Lanka': 'SriLanka',
+ 'India South Coast': 'IndCstS',
+ 'Orinoco': 'OrinocoR',
+ 'Colombia Ecuador Pacific Coast': 'ColEcuaCst',
+ 'Palau and East Indonesia': 'IdnE',
+ 'North Borneo Coast': 'BorneoCstN',
+ 'Volta': 'VoltaR',
+ 'Northeast South America South Atlantic Coast': 'SAmerCstNE',
+ 'Gulf of Guinea': 'GuineaGulf',
+ 'Sumatra': 'Sumatra',
+ 'Sulawesi': 'Sulawesi',
+ 'Kalimantan': 'Kalimantan',
+ 'Magdalena': 'MagdalenaR',
+ 'Irian Jaya Coast': 'IrianJaya',
+ 'Amazon': 'AmazonR',
+ 'South Chile Pacific Coast': 'ChileCstS',
+ 'Shebelli Juba': 'ShebJubR',
+ 'Africa East Central Coast': 'AfrCstE',
+ 'North Brazil South Atlantic Coast': 'BrzCstN',
+ 'Papua New Guinea Coast': 'PapuaCst',
+ 'Tocantins': 'TocantinsR',
+ 'Java Timor': 'JavaTimor',
+ 'Solomon Islands': 'SolomonIsl',
+ 'Madasgacar': 'Madagascar',
+ 'Sepik': 'SepikR',
+ 'Rift Valley': 'RiftValley',
+ 'Peru Pacific Coast': 'PeruCst',
+ 'Fly': 'FlyR',
+ 'Angola Coast': 'AngolaCst',
+ 'Congo': 'CongoR',
+ 'Australia North Coast': 'AusCstN',
+ 'South Pacific Islands': 'NewCaledn',
+ 'East Brazil South Atlantic Coast': 'BrzCstE',
+ 'Parnaiba': 'ParnaibaR',
+ 'Zambezi': 'ZambeziR',
+ 'Australia East Coast': 'AusCstE',
+ 'Africa Indian Ocean Coast': 'AfrCstSE',
+ 'Australia West Coast': 'AusCstW',
+ 'Sao Francisco': 'SaoFrancR',
+ 'Australia Interior': 'AusInt',
+ 'Orange': 'OrangeR',
+ 'Uruguay Brazil South Atlantic Coast': 'BrzCstS',
+ 'Namibia Coast': 'AfrCstSW',
+ 'Africa South Interior': 'AfrIntS',
+ 'South Africa South Coast': 'AfrCstS',
+ 'Limpopo': 'LimpopoR',
+ 'La Puna Region': 'LaPuna',
+ 'New Zealand': 'NewZealand',
+ 'Australia South Coast': 'AusCstS',
+ 'Mar Chiquita': 'MarChiq',
+ 'South Africa West Coast': 'AfrCstSSW',
+ 'Salinas Grandes': 'Salinas',
+ 'La Plata': 'RioLaPlata',
+ 'North Chile Pacific Coast': 'ChileCstN',
+ 'Murray Darling': 'MurrayDrlg',
+ 'Pampas Region': 'Pampas',
+ 'North Argentina South Atlantic Coast': 'ArgCstN',
+ 'Tasmania': 'Tasmania',
+ 'South America Colorado': 'ArgColoR',
+ 'Negro': 'NegroR',
+ 'Central Patagonia Highlands': 'Patagonia',
+ 'South Argentina South Atlantic Coast': 'ArgCstS',
+ 'Antarctica': 'Antarctica',
+ 'California River': 'California',
+ 'Upper Mississippi': 'MissppRN',
+ 'Lower Mississippi River': 'MissppRS',
+ 'Upper Colorado River': 'UsaColoRN',
+ 'Lower Colorado River': 'UsaColoRS',
+ 'Great': 'GreatBasin',
+ 'Missouri River': 'MissouriR',
+ 'Arkansas White Red': 'ArkWhtRedR',
+ 'Texas Gulf Coast': 'TexasCst',
+ 'South Atlantic Gulf': 'UsaCstSE',
+ 'Great Lakes': 'GreatLakes',
+ 'Ohio River': 'OhioR',
+ 'Pacific Northwest': 'UsaPacNW',
+ 'Tennessee River': 'TennR',
+ 'Rio Grande River': 'RioGrande',
+ 'New England': 'UsaCstNE',
+ 'Mid Atlantic': 'UsaCstE',
+ 'Hawaii': 'Hawaii',
+ 'Narmada': 'NarmadaR'
}
}
diff --git a/tethys/utils/source_disaggregation.py b/tethys/utils/source_disaggregation.py
new file mode 100644
index 0000000..cb0976e
--- /dev/null
+++ b/tethys/utils/source_disaggregation.py
@@ -0,0 +1,110 @@
+import os
+import pandas as pd
+import xarray as xr
+
+import gcamreader
+
+from tethys.datareader.easy_query import easy_query
+from tethys.datareader.regional import extract_resource_name
+from tethys.utils.region_to_id_mapping import name_to_id_mapping
+
+
+class get_source_shares:
+ def __init__(self, gcam_db, basin_name_mapping="basin_name_mapping_im3", demand_type='withdrawals', region_masks=None, years=None):
+ """
+ Initialize get_source_shares with parameters for GCAM database and basin mapping.
+
+ :param gcam_db: Path to the GCAM database file.
+ :param basin_name_mapping: Basin name mapping dictionary, can change to other mapping in name_to_id_mapping.
+ :param demand_type: Type of demand, either 'withdrawals' or 'consumption'.
+ :param region_masks: xarray DataArray containing region masks.
+ :param years: List of years to be included in the shares.
+ """
+ if region_masks is None:
+ raise ValueError("region_masks is required to compute gridded shares")
+ if years is None:
+ raise ValueError("years is required to compute gridded shares")
+
+ dbpath, dbfile = os.path.split(gcam_db)
+ self.conn = gcamreader.LocalDBConn(dbpath, dbfile)
+ self.basin_name_mapping = basin_name_mapping
+ self.demand_type = demand_type
+ self.region_masks = region_masks
+ self.years = list(years)
+
+ def calculate_shares(self):
+ """
+ Query the GCAM database, process water data for the specified demand type,
+ and calculate runoff and groundwater shares by region and year.
+
+ :return: Processed DataFrame with calculated shares.
+ """
+ # query GCAM database for water used by source for the configured demand type
+ if self.demand_type not in ('withdrawals', 'consumption'):
+ raise ValueError(f"Unsupported demand_type: {self.demand_type!r}")
+ query_type = f"*_water {self.demand_type}"
+ shares_df = self.conn.runQuery(easy_query('production', replace_filters=True, resource=query_type))
+ # extract and clean resource names (e.g., remove '_water withdrawals')
+ shares_df['resource'] = shares_df['resource'].apply(extract_resource_name)
+
+ # rest of the mappings should be fine, old GCAM versions have typos in basin names
+ if self.basin_name_mapping == "basin_name_mapping_im3":
+ shares_df['resource'] = shares_df['resource'].str.replace('_', ' ').str.replace('-', ' ') # replace '_' and '-' with spaces
+
+ # map resource names to short basin names and concatenate region and resource to work with region_masks
+ shares_df['resourcemap'] = shares_df['resource'].map(name_to_id_mapping[self.basin_name_mapping])
+ shares_df['region_resourcemap'] = shares_df['region'] + '_' + shares_df['resourcemap']
+
+ # calculate shares of runoff and groundwater
+ shares_df['share'] = shares_df.groupby(
+ ['scenario', 'region', 'region_resourcemap', 'year']
+ )['value'].transform(lambda x: x / x.sum() if x.sum() != 0 else 0)
+
+ # capture basins that are mostly belonging to other countries
+ if self.basin_name_mapping == 'basin_name_mapping_im3':
+ shares_df = shares_df.replace({'region_resourcemap': {
+ 'Canada_FraserR': 'USA_FraserR',
+ 'Canada_GreatLakes': 'USA_GreatLakes',
+ 'Mexico_MexCstNW': 'USA_MexCstNW',
+ 'Canada_NelsonR': 'USA_NelsonR',
+ }})
+
+ # filter shares based on the region masks from the config file, otherwise all regions would be included
+ shares_df = shares_df[shares_df['region_resourcemap'].isin(self.region_masks.region.values)]
+
+ return shares_df
+
+ def generate_gridded_shares(self, shares_df):
+ """
+ Generate gridded shares by subresource (e.g., runoff, groundwater).
+
+ :param shares_df: DataFrame containing shares calculated by `calculate_shares`.
+ :return: Dictionary of gridded shares by subresource.
+ """
+ gridded_shares_by_subresource = {}
+ for subresource in shares_df.subresource.unique():
+ gridded_shares_by_year = []
+ for year in self.years:
+ gridded_shares = None
+ for basin in shares_df.region_resourcemap.unique():
+ sel = shares_df[
+ (shares_df['year'] == year) &
+ (shares_df['region_resourcemap'] == basin) &
+ (shares_df['subresource'] == subresource)
+ ]
+ share_val = 0.0 if sel.empty else float(sel.iloc[0].share)
+ gridded_shares = xr.where(
+ self.region_masks.sel(region=basin).load(),
+ share_val,
+ 0 if gridded_shares is None else gridded_shares,
+ keep_attrs=False,
+ )
+ gridded_shares_by_year.append(gridded_shares)
+
+ # Stack years for the subresource
+ gridded_shares_by_subresource[subresource] = xr.concat(
+ gridded_shares_by_year,
+ dim=pd.Index(self.years, name="year"),
+ ).rename('share')
+
+ return gridded_shares_by_subresource