diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7ea64e1..6570c85 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,28 +16,26 @@ jobs: strategy: matrix: os: [ubuntu-latest] + python-version: ['3.9', '3.11'] env: OS: ${{ matrix.os }} - PYTHON: '3.9' + PYTHON: ${{ matrix.python-version }} steps: - uses: actions/checkout@v1 - name: Set up Python - uses: actions/setup-python@master + uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip - pip install click-default-group - pip install . + pip install -e .[dev] - name: Test and generate coverage report on Linux run: | - pip install pytest - pip install pytest-cov pytest --cov=./ --cov-report=xml diff --git a/.github/workflows/paper.yml b/.github/workflows/paper.yml index 80800ce..6f2e8a8 100644 --- a/.github/workflows/paper.yml +++ b/.github/workflows/paper.yml @@ -1,5 +1,13 @@ --- -on: [push] +name: paper + +on: + push: + branches: + - main + pull_request: + branches: + - main jobs: paper: @@ -15,7 +23,7 @@ jobs: # This should be the path to the paper within your repo. paper-path: paper/paper.md - name: Upload - uses: actions/upload-artifact@v1 + uses: actions/upload-artifact@v4 with: name: paper # This is the output path where Pandoc will write the compiled diff --git a/README.md b/README.md index b110684..6f5fc5b 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Details on methodology and advanced usage are available on the [User Guide](http Install `tethys` using pip: -`pip install tethys-downscale` +`pip install tethys-downscaling` ## Contributing to Tethys diff --git a/docs/source/conf.py b/docs/source/conf.py index b84a011..4051ccf 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,7 +26,7 @@ project = 'tethys' copyright = '2021, Battelle Memorial Institute' -author = 'Chris R. Vernon' +author = 'Isaac Thompson, Chris R. Vernon, Hassan Niazi' # The full version, including alpha/beta/rc tags release = version diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 4d234e0..64a7032 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -21,7 +21,7 @@ As a prerequisite, you'll need to have `Python `_ configuration file. The options in this file correspond to the arguments passed to the :ref:`Tethys class `. Options not present in the config file will use the default. An overview is provided in the following table, with more details and examples below. -======================== ========================================================= -Option Description -======================== ========================================================= -:ref:`years` list of years to be included spatial downscaling -:ref:`resolution` resolution in degrees for spatial downscaling -:ref:`demand_type` choice between "withdrawals" (default) or "consumption" -:ref:`perform_temporal` choice between "false" (default) or "true" -:ref:`gcam_db` relative path to a GCAM database -:ref:`csv` relative path to csv file containing inputs -:ref:`output_file` name of file to write outputs to -:ref:`downscaling_rules` mapping from water demand sectors to proxy variables -:ref:`proxy_files` mapping of spatial proxy files to their years/variables -:ref:`map_files` list of files containing region maps -:ref:`temporal_methods` mapping of sector to temporal downscaling method -:ref:`temporal_files` files that will be accessible during temporal downscaling -======================== ========================================================= +======================================= ============================================================================= +Option Description +======================================= ============================================================================= +:ref:`years` list of years to be included spatial downscaling +:ref:`resolution` resolution in degrees for spatial downscaling +:ref:`demand_type` choice between "withdrawals" (default) or "consumption" +:ref:`perform_temporal` choice between "false" (default) or "true" +:ref:`gcam_db` relative path to a GCAM database +:ref:`csv` relative path to csv file containing inputs +:ref:`output_file` name of file to write outputs to +:ref:`downscaling_rules` mapping from water demand sectors to proxy variables +:ref:`proxy_files` mapping of spatial proxy files to their years/variables +:ref:`map_files` list of files containing region maps +:ref:`temporal_methods` mapping of sector to temporal downscaling method +:ref:`temporal_files` files that will be accessible during temporal downscaling +:ref:`source_disaggregation` if true, share of surface water vs groundwater withdrawal will be written out +:ref:`irrigation_conveyance_efficiency` if set, irrigation withdrawal will be divided by this number +======================================= ============================================================================= years @@ -272,6 +274,23 @@ Mapping of files that will be accessible to temporal downscaling methods. CDD: data/temporal/CDD_monthly.nc +source_disaggregation +^^^^^^^^^^^^^^^^^^^^ +If true, share of surface water vs groundwater withdrawal will be written out to a file called gridded_runoff_shares.nc +.. code-block:: yaml + + source_disaggregation: true + + +irrigation_conveyance_efficiency +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +If set, irrigation withdrawal will be divided by this number, representing GCAM's irrigation conveyance efficiency + +.. code-block:: yaml + + irrigation_conveyance_efficiency: 0.829937 + + Generalization -------------- **tethys** was developed with consideration for GCAM's breakdown of water demand, but was designed to be as flexible as possible with support for user-specified downscaling configurations. diff --git a/requirements.txt b/requirements.txt index 57c8033..dd9e8c9 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ PyYAML>=6.0 -gcamreader>=1.2.5 +gcamreader>=1.4.0 numpy>=1.22 pandas>=1.2.4 netCDF4>=1.6 @@ -7,8 +7,6 @@ xarray>=2022.09.0 rioxarray>=0.12.4 tqdm>=4.66.2 matplotlib>=3.8.3 -dask[complete]>=2022.12.1 -bokeh!=3.0.*,>=2.4.2 -geopandas>=0.14.3 -dask_geopandas>=0.3.1 -hvplot>=0.9.2 +dask>=2022.12.1 +distributed>=2022.12.1 +setuptools<81 diff --git a/setup.py b/setup.py index 2ea637e..63f95ee 100644 --- a/setup.py +++ b/setup.py @@ -23,35 +23,37 @@ def get_requirements(): url='https://github.com/JGCRI/tethys', packages=find_packages(), license='BSD-2-Clause', - author='Isaac Thompson, Chris Vernon', + author='Isaac Thompson, Chris Vernon, Hassan Niazi', python_requires='>=3.9, <4', include_package_data=True, install_requires=[ 'PyYAML>=6.0', - 'gcamreader>=1.2.5', + 'gcamreader==1.4.0', 'numpy>=1.22', 'pandas>=1.2.4', 'netCDF4>=1.6', - 'dask[complete]>=2022.12.1', - 'dask_geopandas>=0.3.1', + 'dask==2024.8.0', + 'distributed==2024.8.0', 'xarray>=2022.09.0', 'rioxarray>=0.12.4', 'tqdm>=4.66.2', 'matplotlib>=3.8.3', + 'setuptools<81', 'bokeh!=3.0.*,>=2.4.2', 'geopandas>=0.14.3', + 'dask_geopandas>=0.3.1', 'hvplot>=0.9.2', ], extras_require={ 'dev': [ - 'build~=0.5.1', - 'nbsphinx~=0.8.6', - 'setuptools~=57.0.0', - 'sphinx~=4.0.2', - 'sphinx-panels~=0.6.0', - 'sphinx-rtd-theme~=0.5.2', - 'sphinx-mathjax-offline~=0.0.1', - 'twine>=3.4.1', + 'build==0.10.0', + 'nbsphinx==0.8.12', + 'sphinx==4.5.0', + 'sphinx-panels==0.6.0', + 'sphinx-rtd-theme==0.5.2', + 'sphinx-mathjax-offline==0.0.2', + 'pytest>=8.3.0', + 'pytest-cov>=5.0.0', 'click-default-group>=1.2.4', ] } diff --git a/tests/test_dynamic_population.py b/tests/test_dynamic_population.py new file mode 100644 index 0000000..c775453 --- /dev/null +++ b/tests/test_dynamic_population.py @@ -0,0 +1,474 @@ +""" +Test script to verify dynamic population functionality in Tethys. + +This script compares outputs from two scenarios: +1. Static population: Uses only base year (2010) population for all years +2. Dynamic population: Uses multiple years (2010, 2020) with interpolation + +The comparison demonstrates whether dynamic population is working correctly. +""" + +import os +import yaml +import xarray as xr +import numpy as np +import pandas as pd +from pathlib import Path + +import tethys +from tethys.model import Tethys + + +class DynamicPopulationTest: + """Test framework for comparing static vs dynamic population scenarios.""" + + def __init__(self, test_data_dir=None): + """ + Initialize the test framework. + + :param test_data_dir: Path to test data directory. If None, uses default test data location. + """ + if test_data_dir is None: + self.test_data_dir = Path(__file__).parent / 'data' + else: + self.test_data_dir = Path(test_data_dir) + + self.base_config = None + self.static_config_path = None + self.dynamic_config_path = None + self.static_csv_path = None + self.dynamic_csv_path = None + self.static_results = None + self.dynamic_results = None + + def _load_base_config(self): + """Load the base test configuration.""" + config_file = self.test_data_dir / 'config_test.yml' + with open(config_file, 'r') as f: + self.base_config = yaml.safe_load(f) + + def _create_extended_csv(self, years): + """Create an extended CSV file with data for all specified years.""" + # Load original CSV + original_csv = self.test_data_dir / 'data.csv' + df = pd.read_csv(original_csv) + + # Get unique regions and sectors + regions = df['region'].unique() + sectors = df['sector'].unique() + + # Create extended data with all years + extended_rows = [] + for region in regions: + for sector in sectors: + region_sector_data = df[(df['region'] == region) & (df['sector'] == sector)].sort_values('year') + + if len(region_sector_data) > 0: + # Interpolate/extrapolate for missing years + existing_years = region_sector_data['year'].values + existing_values = region_sector_data['value'].values + + for year in years: + if year in existing_years: + # Use existing value + value = float(region_sector_data[region_sector_data['year'] == year]['value'].iloc[0]) + else: + # Linear interpolation/extrapolation + if year < existing_years[0]: + # Extrapolate backward + value = existing_values[0] + elif year > existing_years[-1]: + # Extrapolate forward (linear trend) + if len(existing_years) >= 2: + # Use last two points for trend + slope = (existing_values[-1] - existing_values[-2]) / (existing_years[-1] - existing_years[-2]) + value = existing_values[-1] + slope * (year - existing_years[-1]) + else: + value = existing_values[-1] + else: + # Interpolate between existing years + idx = np.searchsorted(existing_years, year) + y1, y2 = existing_years[idx-1], existing_years[idx] + v1, v2 = existing_values[idx-1], existing_values[idx] + if y2 != y1: + value = v1 + (v2 - v1) * (year - y1) / (y2 - y1) + else: + value = v1 + + extended_rows.append({ + 'region': region, + 'sector': sector, + 'year': year, + 'value': value + }) + + extended_df = pd.DataFrame(extended_rows) + return extended_df + + def _create_static_config(self): + """Create a configuration file for static population scenario.""" + # Create config file in test data directory so relative paths work + self.static_config_path = self.test_data_dir / 'config_static_temp.yml' + self.static_csv_path = self.test_data_dir / 'data_static_temp.csv' + + # Create extended CSV with all years + extended_df = self._create_extended_csv([2010, 2015, 2020, 2025, 2030]) + extended_df.to_csv(self.static_csv_path, index=False) + + # Copy base config and modify for static population + config = self.base_config.copy() + config['years'] = [2010, 2015, 2020, 2025, 2030] # Multiple years to test + config['csv'] = str(self.static_csv_path.name) # Use extended CSV + config['proxy_files'] = { + 'population_{year}.tif': { + 'variables': 'Population', + 'years': [2010] # Only base year + } + } + + with open(self.static_config_path, 'w') as f: + yaml.dump(config, f) + + return str(self.static_config_path) + + def _create_dynamic_config(self): + """Create a configuration file for dynamic population scenario.""" + self.dynamic_config_path = self.test_data_dir / 'config_dynamic_temp.yml' + self.dynamic_csv_path = self.test_data_dir / 'data_dynamic_temp.csv' + + # Create extended CSV with all years + extended_df = self._create_extended_csv([2010, 2015, 2020, 2025, 2030]) + extended_df.to_csv(self.dynamic_csv_path, index=False) + + # Copy base config and modify for dynamic population + config = self.base_config.copy() + config['years'] = [2010, 2015, 2020, 2025, 2030] # Multiple years to test + config['csv'] = str(self.dynamic_csv_path.name) # Use extended CSV + config['proxy_files'] = { + 'population_{year}.tif': { + 'variables': 'Population', + 'years': [2010, 2020] # Multiple years for interpolation (2030 will use 2020 as nearest endpoint) + } + } + + with open(self.dynamic_config_path, 'w') as f: + yaml.dump(config, f) + + return str(self.dynamic_config_path) + + def run_static_scenario(self): + """Run Tethys with static population configuration.""" + print("=" * 70) + print("Running STATIC population scenario (only 2010 population)") + print("=" * 70) + + if self.static_config_path is None: + self._create_static_config() + + # Use absolute path for config file + config_path = os.path.abspath(self.static_config_path) + model = Tethys(config_file=config_path) + model.run_model() + self.static_results = model.outputs + + print("Static scenario completed.\n") + return self.static_results + + def run_dynamic_scenario(self): + """Run Tethys with dynamic population configuration.""" + print("=" * 70) + print("Running DYNAMIC population scenario (2010 and 2020 population)") + print("=" * 70) + + if self.dynamic_config_path is None: + self._create_dynamic_config() + + # Use absolute path for config file + config_path = os.path.abspath(self.dynamic_config_path) + model = Tethys(config_file=config_path) + model.run_model() + self.dynamic_results = model.outputs + + print("Dynamic scenario completed.\n") + return self.dynamic_results + + def compare_outputs(self): + """Compare outputs from static and dynamic scenarios.""" + if self.static_results is None or self.dynamic_results is None: + raise ValueError("Both scenarios must be run before comparison.") + + print("=" * 70) + print("COMPARING STATIC vs DYNAMIC POPULATION OUTPUTS") + print("=" * 70) + + # Get the Municipal sector output (should use Population proxy) + sector = 'Municipal' + + if sector not in self.static_results.data_vars: + # Try to find the sector in the outputs + available_sectors = list(self.static_results.data_vars.keys()) + if available_sectors: + sector = available_sectors[0] + print(f"Warning: 'Municipal' not found, using '{sector}' instead.") + else: + raise ValueError("No sectors found in outputs.") + + static_da = self.static_results[sector] + dynamic_da = self.dynamic_results[sector] + + # Ensure same coordinates + static_da = static_da.reindex_like(dynamic_da, fill_value=0) + + # Calculate differences + diff = dynamic_da - static_da + abs_diff = np.abs(diff) + + # Calculate statistics per year + years = static_da.year.values if hasattr(static_da, 'year') else None + + comparison_stats = {} + + if years is not None: + for year in years: + static_year = static_da.sel(year=year, drop=True) + dynamic_year = dynamic_da.sel(year=year, drop=True) + diff_year = diff.sel(year=year, drop=True) + abs_diff_year = abs_diff.sel(year=year, drop=True) + + # Calculate totals + static_total = float(static_year.sum().values) + dynamic_total = float(dynamic_year.sum().values) + + # Calculate differences + total_diff = float(diff_year.sum().values) + total_abs_diff = float(abs_diff_year.sum().values) + max_abs_diff = float(abs_diff_year.max().values) + mean_abs_diff = float(abs_diff_year.mean().values) + + # Calculate relative difference + if static_total != 0: + rel_diff_pct = (total_diff / static_total) * 100 + else: + rel_diff_pct = np.nan + + comparison_stats[int(year)] = { + 'static_total': static_total, + 'dynamic_total': dynamic_total, + 'total_difference': total_diff, + 'total_abs_difference': total_abs_diff, + 'max_abs_difference': max_abs_diff, + 'mean_abs_difference': mean_abs_diff, + 'relative_difference_pct': rel_diff_pct + } + + return comparison_stats, diff, abs_diff + + def generate_report(self, comparison_stats, diff, abs_diff, output_file=None): + """Generate a detailed comparison report.""" + print("\n" + "=" * 70) + print("COMPARISON REPORT: STATIC vs DYNAMIC POPULATION") + print("=" * 70) + + # Summary table + print("\nSUMMARY STATISTICS BY YEAR:") + print("-" * 70) + print(f"{'Year':<8} {'Static Total':<15} {'Dynamic Total':<15} {'Difference':<15} {'Rel Diff %':<12}") + print("-" * 70) + + for year in sorted(comparison_stats.keys()): + stats = comparison_stats[year] + print(f"{year:<8} {stats['static_total']:<15.2f} {stats['dynamic_total']:<15.2f} " + f"{stats['total_difference']:<15.2f} {stats['relative_difference_pct']:<12.2f}") + + print("-" * 70) + + # Detailed statistics + print("\nDETAILED STATISTICS:") + print("-" * 70) + for year in sorted(comparison_stats.keys()): + stats = comparison_stats[year] + print(f"\nYear {year}:") + print(f" Total difference: {stats['total_difference']:.6f}") + print(f" Total absolute difference: {stats['total_abs_difference']:.6f}") + print(f" Maximum absolute difference: {stats['max_abs_difference']:.6f}") + print(f" Mean absolute difference: {stats['mean_abs_difference']:.6f}") + print(f" Relative difference: {stats['relative_difference_pct']:.2f}%") + + # Validation checks + print("\n" + "=" * 70) + print("VALIDATION CHECKS:") + print("=" * 70) + + validation_results = {} + + # Check 1: 2010 should be identical (baseline year) + if 2010 in comparison_stats: + diff_2010 = abs(comparison_stats[2010]['total_difference']) + if diff_2010 < 1e-6: + print("✓ PASS: Year 2010 outputs are identical (baseline year)") + validation_results['baseline_identical'] = True + else: + print(f"✗ FAIL: Year 2010 outputs differ by {diff_2010:.6f}") + validation_results['baseline_identical'] = False + + # Check 2: 2015 should differ (interpolation test) + if 2015 in comparison_stats: + diff_2015 = abs(comparison_stats[2015]['total_difference']) + if diff_2015 > 1e-6: + print(f"✓ PASS: Year 2015 outputs differ by {diff_2015:.6f} (interpolation working)") + validation_results['interpolation_working'] = True + else: + print(f"✗ FAIL: Year 2015 outputs are identical (interpolation may not be working)") + validation_results['interpolation_working'] = False + + # Check 3: 2020 should differ (future year test) + if 2020 in comparison_stats: + diff_2020 = abs(comparison_stats[2020]['total_difference']) + if diff_2020 > 1e-6: + print(f"✓ PASS: Year 2020 outputs differ by {diff_2020:.6f} (future year being used)") + validation_results['future_year_used'] = True + else: + print(f"✗ FAIL: Year 2020 outputs are identical (future year may not be used)") + validation_results['future_year_used'] = False + + # Check 4: 2025 should differ (will use 2020 as nearest endpoint since no 2030 data) + if 2025 in comparison_stats: + diff_2025 = abs(comparison_stats[2025]['total_difference']) + if diff_2025 > 1e-6: + print(f"✓ PASS: Year 2025 outputs differ by {diff_2025:.6f} (using 2020 as nearest endpoint)") + validation_results['interpolation_2025_working'] = True + else: + print(f"✗ FAIL: Year 2025 outputs are identical (may not be using 2020 endpoint)") + validation_results['interpolation_2025_working'] = False + + # Check 5: 2030 should differ (will use 2020 as nearest endpoint since no 2030 data) + if 2030 in comparison_stats: + diff_2030 = abs(comparison_stats[2030]['total_difference']) + if diff_2030 > 1e-6: + print(f"✓ PASS: Year 2030 outputs differ by {diff_2030:.6f} (using 2020 as nearest endpoint)") + validation_results['extended_future_year_used'] = True + else: + print(f"✗ FAIL: Year 2030 outputs are identical (may not be using 2020 endpoint)") + validation_results['extended_future_year_used'] = False + + # Overall assessment + print("\n" + "=" * 70) + print("OVERALL ASSESSMENT:") + print("=" * 70) + + if all(validation_results.values()): + print("✓✓✓ DYNAMIC POPULATION IS WORKING CORRECTLY ✓✓✓") + print(" - Baseline year (2010) is identical") + print(" - Interpolated year (2015) shows differences") + print(" - Future year (2020) shows differences") + print(" - Extended interpolated year (2025) shows differences") + print(" - Extended future year (2030) shows differences") + else: + failed_checks = [k for k, v in validation_results.items() if not v] + print("✗✗✗ DYNAMIC POPULATION MAY NOT BE WORKING AS EXPECTED ✗✗✗") + print(f" Failed checks: {', '.join(failed_checks)}") + + # Save to file if requested + if output_file: + self._save_report_to_file(comparison_stats, validation_results, output_file) + + return validation_results + + def _save_report_to_file(self, comparison_stats, validation_results, output_file): + """Save comparison report to a file.""" + with open(output_file, 'w') as f: + f.write("DYNAMIC POPULATION TEST REPORT\n") + f.write("=" * 70 + "\n\n") + + f.write("SUMMARY STATISTICS BY YEAR:\n") + f.write("-" * 70 + "\n") + f.write(f"{'Year':<8} {'Static Total':<15} {'Dynamic Total':<15} {'Difference':<15} {'Rel Diff %':<12}\n") + f.write("-" * 70 + "\n") + + for year in sorted(comparison_stats.keys()): + stats = comparison_stats[year] + f.write(f"{year:<8} {stats['static_total']:<15.2f} {stats['dynamic_total']:<15.2f} " + f"{stats['total_difference']:<15.2f} {stats['relative_difference_pct']:<12.2f}\n") + + f.write("\nVALIDATION RESULTS:\n") + f.write("-" * 70 + "\n") + for check, result in validation_results.items(): + f.write(f"{check}: {'PASS' if result else 'FAIL'}\n") + + print(f"\nReport saved to: {output_file}") + + def cleanup(self): + """Clean up temporary config and CSV files.""" + files_to_remove = [ + self.static_config_path, + self.dynamic_config_path, + self.static_csv_path, + self.dynamic_csv_path + ] + + removed = False + for file_path in files_to_remove: + if file_path and os.path.exists(file_path): + os.remove(file_path) + removed = True + + if removed: + print("Cleaned up temporary config and CSV files.") + + +def main(): + """Main function to run the dynamic population test.""" + import argparse + + parser = argparse.ArgumentParser(description='Test dynamic population functionality in Tethys') + parser.add_argument('--test-data-dir', type=str, default=None, + help='Path to test data directory (default: tests/data)') + parser.add_argument('--output-report', type=str, default=None, + help='Path to save comparison report (optional)') + parser.add_argument('--no-cleanup', action='store_true', + help='Keep temporary files after test') + + args = parser.parse_args() + + # Initialize test framework + test = DynamicPopulationTest(test_data_dir=args.test_data_dir) + + try: + # Load base config + test._load_base_config() + + # Create config files + test._create_static_config() + test._create_dynamic_config() + + # Run both scenarios + test.run_static_scenario() + test.run_dynamic_scenario() + + # Compare outputs + comparison_stats, diff, abs_diff = test.compare_outputs() + + # Generate report + validation_results = test.generate_report(comparison_stats, diff, abs_diff, + output_file=args.output_report) + + # Return exit code based on validation + if all(validation_results.values()): + return 0 + else: + return 1 + + except Exception as e: + print(f"\nERROR: {str(e)}") + import traceback + traceback.print_exc() + return 1 + finally: + if not args.no_cleanup: + test.cleanup() + + +if __name__ == '__main__': + import sys + sys.exit(main()) + diff --git a/tethys/__init__.py b/tethys/__init__.py index a88df2d..3fab7a4 100644 --- a/tethys/__init__.py +++ b/tethys/__init__.py @@ -3,4 +3,4 @@ from .utils.spatial import InputViewer -__version__ = "2.1" +__version__ = "2.2.0" diff --git a/tethys/datareader/easy_query.py b/tethys/datareader/easy_query.py index 723578e..68c660b 100644 --- a/tethys/datareader/easy_query.py +++ b/tethys/datareader/easy_query.py @@ -1,7 +1,7 @@ import gcamreader -def easy_query(variable, year_axis=None, **kwargs): +def easy_query(variable, year_axis=None, replace_filters=False, **kwargs): """Build a query for a GCAM database kwargs act as filters on nodes, eg. "sector='Beef'" is converted to the xpath "[@type='sector' and @name='Beef']" @@ -24,8 +24,11 @@ def easy_query(variable, year_axis=None, **kwargs): :return: gcamreader Query object """ - filters = dict(sector=None) # match nodes where @type='sector' by default - filters.update(kwargs) # update the filters with user-provided kwargs + if replace_filters: + filters = kwargs + else: + filters = dict(sector=None) # match nodes where @type='sector' by default + filters.update(kwargs) # update the filters with user-provided kwargs # filters are separated by "//*" to match any descendant xpath = "*" + "//*".join(handle_filter(k, v) for k, v in filters.items()) + f"//{variable}/node()" diff --git a/tethys/datareader/gridded.py b/tethys/datareader/gridded.py index 23d4059..2bf0012 100644 --- a/tethys/datareader/gridded.py +++ b/tethys/datareader/gridded.py @@ -2,7 +2,7 @@ import xarray as xr -def load_file(filename, target_resolution, years, variables=None, flags=None, regrid_method='extensive'): +def load_file(filename, target_resolution, years, variables=None, flags=(), bounds=None, regrid_method='extensive'): """Prepare a dataset from single file to be merged into a dataset of all proxies handles many oddities found in proxies @@ -12,13 +12,12 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re :param years: years to extract from the file :param variables: variables to extract from the file :param flags: list potentially containing 'cell_area_share' or 'short_name_as_name' + :param bounds: list [lat_min, lat_max, lon_min, lon_max] to crop to :param regrid_method: passed along to regrid :return: preprocessed data set """ - flags = [] if flags is None else flags - - ds = xr.open_dataset(filename, chunks='auto') + ds = xr.open_mfdataset(filename, chunks='auto') # handle tif (can only handle single band single variable single year currently) if 'band' in ds.coords: @@ -33,7 +32,10 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re # filter to desired variables if variables is not None: - ds = ds[variables] + if len(ds) == 1 and len(variables) > 1: # use single layer for all variables + ds[variables] = [ds[variables[0]] for i in variables] + else: + ds = ds[variables] # create a year dimension if missing, with the years reported for this file in the catalog if 'year' not in ds.coords: @@ -41,11 +43,7 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re else: ds = ds.sel(year=years, method='nearest') # nearest used for temporal files ds['year'] = years - - # do the year filtering - if years is not None and 'year' in ds.coords: - ds = ds.sel(year=years, method='nearest').chunk(chunks=dict(year=1)) - ds['year'] = years + ds = ds.chunk(year=1) # numeric stuff ds = ds.fillna(0).astype(np.float32) @@ -59,10 +57,12 @@ def load_file(filename, target_resolution, years, variables=None, flags=None, re # spatial aligning ds = pad_global(ds) ds = regrid(ds, target_resolution, method=regrid_method) + if bounds is not None: + ds = crop(ds, bounds) ds = ds.chunk(chunks=dict(lat=-1, lon=-1)) if 'month' in ds.coords: - ds = ds.chunk(chunks=dict(month=12)) + ds = ds.chunk(month=12) return ds @@ -133,6 +133,11 @@ def pad_global(ds): return ds +def crop(ds, bounds): + lat_min, lat_max, lon_min, lon_max = bounds + return ds.sel(lon=slice(lon_min, lon_max), lat=slice(lat_max, lat_min)) + + def regrid(ds, target_resolution, method='extensive'): """Simple regridding algorithm diff --git a/tethys/datareader/maps.py b/tethys/datareader/maps.py index 04ad026..9b24cc7 100644 --- a/tethys/datareader/maps.py +++ b/tethys/datareader/maps.py @@ -2,16 +2,17 @@ import pandas as pd import xarray as xr -from tethys.datareader.gridded import regrid +from tethys.datareader.gridded import regrid, pad_global, crop -def load_region_map(mapfile, masks=False, namefile=None, target_resolution=None, nodata=None, flip_lat=False): +def load_region_map(mapfile, masks=False, namefile=None, target_resolution=None, bounds=None, nodata=None, flip_lat=False): """ Load region map. :param mapfile: path to map file :param masks: bool whether to convert categorical map to layer of region masks :param namefile: optional path to csv with region names :param target_resolution: resolution to coerce map to. If None (default), use base resolution + :param bounds: list [lat_min, lat_max, lon_min, lon_max] to crop to :param nodata: nodata value (like 9999), will be replaced with 0 :param flip_lat: bool, whether the map is "upside down" """ @@ -41,9 +42,13 @@ def load_region_map(mapfile, masks=False, namefile=None, target_resolution=None, da = da.squeeze('band').drop_vars('band') if target_resolution is not None: + da = pad_global(da) da = regrid(da, target_resolution, method='label') da = da.chunk(chunks=dict(lat=-1, lon=-1)) + if bounds is not None: + da = crop(da, bounds) + # use region names provided in CSV namefile, otherwise check metadata for a names dict if namefile is not None: df = pd.read_csv(namefile) diff --git a/tethys/datareader/regional.py b/tethys/datareader/regional.py index 083eab7..ddd618f 100644 --- a/tethys/datareader/regional.py +++ b/tethys/datareader/regional.py @@ -18,7 +18,7 @@ def load_region_data(gcam_db, sectors, demand_type='withdrawals'): # remove subsector and technology if present, convert friendly water demand sector names to internal search_sectors = list(set(unfriendly_sector_name(i.split('/')[0]) for i in sectors)) - inputs = {'withdrawals': ['water_td_*_W', '*_water withdrawals', '*_desalinated water'], + inputs = {'withdrawals': ['water_td_*_W', '*_water withdrawals'], 'consumption': ['water_td_*_C', '*_water consumption']}.get(demand_type) df = conn.runQuery(easy_query('demand-physical', sector=search_sectors, technology='!water_td_*', input=inputs)) @@ -41,6 +41,19 @@ def load_region_data(gcam_db, sectors, demand_type='withdrawals'): return df +def extract_resource_name(x): + """Removes '_water withdrawals' or '_water consumption' from GCAM sector or resource name. + + :param x: resource name string. + :return: cleaned resource name without suffix. + """ + if x.endswith('_water withdrawals'): + return x.removesuffix('_water withdrawals') + if x.endswith('_water consumption'): + return x.removesuffix('_water consumption') + return x + + def extract_basin_name(x): """Maps 'water_td_irr_basin_C' to '_basin', and water_td_elec_C to ''""" if x.startswith('water_td_irr_'): diff --git a/tethys/model.py b/tethys/model.py index fd7db58..dbaf361 100644 --- a/tethys/model.py +++ b/tethys/model.py @@ -19,49 +19,59 @@ from tethys.datareader.gridded import load_file, interp_helper from tethys.datareader.maps import load_region_map +from tethys.utils.source_disaggregation import get_source_shares class Tethys: """Model wrapper for Tethys""" def __init__( - self, - config_file=None, - years=None, - resolution=0.125, + self, + config_file=None, + years=None, + resolution=0.125, + bounds=None, demand_type='withdrawals', - perform_temporal=False, - gcam_db=None, - csv=None, - output_file=None, - downscaling_rules=None, - proxy_files=None, - map_files=None, - temporal_files=None, - temporal_methods=None + source_disaggregation=None, + gcam_db=None, + csv=None, + output_dir=None, + supersector_iterations=0, + downscaling_rules=None, + proxy_files=None, + map_files=None, + temporal_config=None, + perform_temporal=False, + temporal_methods=None, + temporal_files=None, ): """Parameters can be specified in a YAML file or passed directly, with the config file taking precedence :param config_file: path to YAML configuration file containing these parameters - :param years: list of years to be included spatial downscaling + :param years: list of years to be included for spatial downscaling :param resolution: resolution in degrees for spatial downscaling + :param bounds: list [lat_min, lat_max, lon_min, lon_max] to crop to :param demand_type: choice between “withdrawals” (default) or “consumption” - :param perform_temporal: choice between False (default) or True + :param source_disaggregation: decide whether to disaggregate water demand by source (runoff, groundwater, desal) :param gcam_db: relative path to a GCAM database :param csv: relative path to csv file containing inputs - :param output_file: name of file to write outputs to + :param output_dir: directory to write outputs to + :param supersector_iterations: number of times to repeat applying individual and total sector constraints, default 0 :param downscaling_rules: mapping from water demand sectors to proxy variables :param proxy_files: mapping of spatial proxy files to their years/variables :param map_files: list of files containing region maps - :param temporal_files: mapping of sector to temporal downscaling method - :param temporal_methods: files that will be accessible during temporal downscaling + :param perform_temporal: if True, apply temporal downscaling + :param temporal_methods: legacy mapping of sector to method names in tethys.tdmethods + :param temporal_files: legacy mapping of file aliases used by temporal methods + :param temporal_config: mapping of sector to temporal downscaling method and arguments """ self.root = None # project level settings self.years = years self.resolution = resolution + self.bounds = bounds self.demand_type = demand_type - self.perform_temporal = perform_temporal + self.source_disaggregation = source_disaggregation # GCAM database info self.gcam_db = gcam_db @@ -70,21 +80,29 @@ def __init__( self.csv = csv # outputs - self.output_file = output_file + self.output_dir = output_dir self.downscaling_rules = downscaling_rules + self.supersector_iterations = supersector_iterations self.proxy_files = proxy_files self.map_files = map_files - self.temporal_files = temporal_files + self.perform_temporal = perform_temporal self.temporal_methods = temporal_methods + self.temporal_files = temporal_files + self.temporal_config = temporal_config # data we'll load or generate later self.region_masks = None self.proxies = None self.inputs = None self.outputs = None + self.shares = None + self.griddedshares = None + self.disaggregated_sw = None + self.disaggregated_gw = None + self.irrigation_conveyance_efficiency = None # settings in YAML override settings passed directly to __init__ if config_file is not None: @@ -99,29 +117,57 @@ def __init__( else: self.root = os.getcwd() - if self.temporal_methods is None: - self.temporal_methods = { - 'domestic': 'domestic', - 'municipal': 'domestic', - 'electricity': 'electricity', - 'irrigation': 'irrigation' - } - else: - self.temporal_methods = {k.lower(): v for k, v in self.temporal_methods.items()} - - if self.temporal_files is not None: - self.temporal_files = {k: os.path.join(self.root, v) for k, v in self.temporal_files.items()} - - if self.output_file is not None: - self.output_file = os.path.join(self.root, self.output_file) + if self.output_dir is not None: + self.output_dir = os.path.join(self.root, self.output_dir) self._parse_proxy_files() + self._normalize_temporal_config() + + def _resolve_path(self, value): + if value is None: + return None + return value if os.path.isabs(value) else os.path.join(self.root, value) + + # maps method name → {kwarg_name: temporal_files key} + _TEMPORAL_FILE_MAP = { + 'domestic': {'tasfile': 'tas', 'rfile': 'domr'}, + 'electricity': {'hddfile': 'hdd', 'cddfile': 'cdd'}, + 'irrigation': {'irrfile': 'irr'}, + 'weights': {'weightfile': 'weight'}, + } + + def _normalize_temporal_config(self): + if self.temporal_config is not None: + for cfg in self.temporal_config.values(): + cfg['kwargs'] = {k: self._resolve_path(v) if k.endswith('file') or k == 'gcam_db' else v + for k, v in cfg.get('kwargs', {}).items()} + return + + if not self.perform_temporal and not self.temporal_methods and not self.temporal_files: + return + + files = self.temporal_files or {} + methods = self.temporal_methods or { + 'Municipal': 'domestic', 'Electricity': 'electricity', 'Irrigation': 'irrigation'} + + config = {} + for sector, method in methods.items(): + kwargs = {k: self._resolve_path(files.get(v)) + for k, v in self._TEMPORAL_FILE_MAP.get(method, {}).items()} + if method in ('electricity', 'irrigation') and self.map_files: + kwargs['regionfile'] = self._resolve_path(self.map_files[0]) + if method == 'electricity': + kwargs['gcam_db'] = self._resolve_path(self.gcam_db) + config[sector] = {'method': method, + 'kwargs': {k: v for k, v in kwargs.items() if v is not None}} + + self.temporal_config = config or None def _parse_proxy_files(self): """Handle several shorthand expressions in the proxy catalog""" out = dict() - # name may be something like "ssp1_[YEAR].tif", which actually refers to multiple files + # name may be something like "ssp1_{YEAR}.tif", which actually refers to multiple files # such as "ssp1_2010.tif" and "ssp1_2020.tif" when info['years'] == [2010, 2020] for name, info in self.proxy_files.items(): # promote strs to list @@ -162,17 +208,17 @@ def _load_proxies(self): """Load all proxies from the catalog, regrid to target spatial resolution, and interpolate to target years""" print('Loading Proxy Data') # align each variable spatially - dataarrays = [da for filename, info in self.proxy_files.items() for da in - load_file(os.path.join(self.root, filename), self.resolution, **info).values()] + dataarrays = [da for filename, info in self.proxy_files.items() for da in load_file( + os.path.join(self.root, filename), self.resolution, bounds=self.bounds, **info).values()] print('Interpolating Proxies') # interpolate each variable, then merge to one array - self.proxies = xr.merge(interp_helper(xr.concat([da for da in dataarrays if da.name == variable], 'year'), + self.proxies = xr.merge(interp_helper(xr.concat([da for da in dataarrays if da.name == variable], 'year', coords='minimal'), self.years) for variable in set(da.name for da in dataarrays)).to_array() def _load_region_masks(self): self.region_masks = xr.concat([load_region_map(os.path.join(self.root, filename), masks=True, - target_resolution=self.resolution) + target_resolution=self.resolution, bounds=self.bounds) for filename in self.map_files], dim='region') def _load_inputs(self): @@ -186,6 +232,8 @@ def _load_inputs(self): # filter inputs to valid regions and years self.inputs = self.inputs[(self.inputs.region.isin(self.region_masks.region.data)) & (self.inputs.year.isin(self.years))] + # replace "/" with "_" because it causes problems with netcdf variable names + self.inputs['sector'] = self.inputs.sector.str.replace('/', '_') def downscale(self, distribution, inputs, region_masks): """Actual spatial downscaling happens here @@ -209,13 +257,45 @@ def downscale(self, distribution, inputs, region_masks): return out + def disaggregate_source(self): + """Disaggregate water demand by source (runoff, groundwater, desal)""" + + print('Disaggregating Source') + + # initialize source shares object + get_shares = get_source_shares(gcam_db=self.gcam_db, demand_type=self.demand_type, + region_masks=self.region_masks, years=self.years) + + # query and calculate shares + shares_df = get_shares.calculate_shares() + + # apply shares to the gridded region masks + gridded_shares = get_shares.generate_gridded_shares(shares_df) + + return gridded_shares + + def run_model(self): self.outputs = xr.Dataset() + self.griddedshares = xr.Dataset() + self.disaggregated_sw = xr.Dataset() + self.disaggregated_gw = xr.Dataset() self._load_proxies() self._load_region_masks() self._load_inputs() + # disaggregate water supply source + # disaggregate water supply source + if self.source_disaggregation: + gshares = xr.Dataset(self.disaggregate_source()) + + # store the disaggregated results + self.griddedshares = gshares + if self.output_dir is not None: + filename = os.path.join(self.output_dir, f'gridded_runoff_shares.nc') + gshares['runoff'].to_netcdf(filename) + for supersector, rules in self.downscaling_rules.items(): print(f'Downscaling {supersector}') @@ -224,7 +304,8 @@ def run_model(self): rules = {supersector: rules} proxies = xr.Dataset( - {sector: self.proxies.sel(variable=proxy if isinstance(proxy, list) else [proxy]).sum('variable') + {sector.replace('/', '_'): + self.proxies.sel(variable=proxy if isinstance(proxy, list) else [proxy]).sum('variable') for sector, proxy in rules.items()} ).to_array(dim='sector') @@ -246,29 +327,50 @@ def run_model(self): region_masks_total = self.region_masks.sel(region=inputs_total.region) - downscaled = self.downscale(downscaled, inputs_total, region_masks_total) + # alternate between applying total and individual sector constraints so that both are met + for i in range(self.supersector_iterations): + downscaled = self.downscale(downscaled, inputs_total, region_masks_total) + downscaled = self.downscale(downscaled, inputs, region_masks) + + # in a lot of cases this could be optimized by solving the intersections at region scale first, + # then downscaling once, but harder to implement, especially if differing regions are not subsets - if self.perform_temporal: + # handle irrigation withdrawal conveyance efficiency if set + if (supersector=='Irrigation') and (self.demand_type=='withdrawals') and (self.irrigation_conveyance_efficiency is not None): + downscaled = downscaled / self.irrigation_conveyance_efficiency + + # write spatial downscaling outputs + if self.output_dir is not None: + filename = os.path.join(self.output_dir, f'{supersector}_{self.demand_type}.nc') + encoding = {sector: {'zlib': True, 'complevel': 5} for sector in downscaled.sector.data} + downscaled.to_dataset(dim='sector').to_netcdf(filename, encoding=encoding) + downscaled = xr.open_dataset(filename).to_array(dim='sector') # hopefully this keeps dask happy + + if self.temporal_config is not None: # calculate the monthly distributions (share of annual) for each year # this is how we'll do this for now - if supersector.lower() in self.temporal_methods: - module = f'tethys.tdmethods.{self.temporal_methods[supersector.lower()]}' - distribution = getattr(importlib.import_module(module), 'temporal_distribution')(self) + if supersector in self.temporal_config: + module = f'tethys.tdmethods.' + self.temporal_config[supersector]['method'] + temporal_distribution = getattr(importlib.import_module(module), 'temporal_distribution') + years = range(self.years[0], self.years[-1] + 1) + kwargs = self.temporal_config[supersector]['kwargs'] + distribution = temporal_distribution(years=years, resolution=self.resolution, + bounds=self.bounds, **kwargs) else: + # fall back to uniform distribution distribution = xr.DataArray(np.full(12, 1/12, np.float32), coords=dict(month=range(1, 13))) downscaled = interp_helper(downscaled) * distribution - self.outputs.update(downscaled.to_dataset(dim='sector')) + # write temporal downscaling outputs + if self.output_dir is not None: + filename = os.path.join(self.output_dir, f'{supersector}_{self.demand_type}_monthly.nc') + encoding = {sector: {'zlib': True, 'complevel': 5} for sector in downscaled.sector.data} + downscaled.to_dataset(dim='sector').compute().to_netcdf(filename, encoding=encoding) + downscaled = xr.open_dataset(filename).to_array(dim='sector') # hopefully this keeps dask happy - if self.output_file is not None: - print('Writing Outputs') - # cannot have '/' in netcdf variable name - self.outputs = self.outputs.rename({name: name.replace('/', '_') for name in list(self.outputs)}) - # compression - encoding = {variable: {'zlib': True, 'complevel': 5} for variable in self.outputs} - self.outputs.to_netcdf(self.output_file, encoding=encoding) + self.outputs.update(downscaled.to_dataset(dim='sector')) def reaggregate(self, region_masks=None): """Reaggregate from grid cells to regions diff --git a/tethys/tdmethods/domestic.py b/tethys/tdmethods/domestic.py index ac1afbf..01d7d8b 100644 --- a/tethys/tdmethods/domestic.py +++ b/tethys/tdmethods/domestic.py @@ -2,12 +2,17 @@ from tethys.datareader.gridded import load_file -def temporal_distribution(model): +def temporal_distribution(years, resolution=None, tasfile=None, rfile=None, tasvar='tas', rvar='amplitude', bounds=None): """Temporal downscaling for domestic water demand using algorithm from Wada et al. (2011)""" - years = range(model.years[0], model.years[-1] + 1) - tas = load_file(model.temporal_files['tas'], model.resolution, years, regrid_method='intensive')['tas'] - amplitude = load_file(model.temporal_files['domr'], model.resolution, years, regrid_method='label')['amplitude'] + if hasattr(years, 'temporal_config'): + model = years + cfg = dict((model.temporal_config or {}).get('Municipal', {}).get('kwargs', {}) or {}) + cfg.setdefault('bounds', model.bounds) + return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg) + + tas = load_file(tasfile, resolution, years, bounds=bounds, regrid_method='intensive', variables=[tasvar])[tasvar] + amplitude = load_file(rfile, resolution, years, bounds=bounds, regrid_method='label', variables=[rvar])[rvar] ranges = tas.max(dim='month') - tas.min(dim='month') ranges = xr.where(ranges != 0, ranges, 1) # avoid 0/0 diff --git a/tethys/tdmethods/electricity.py b/tethys/tdmethods/electricity.py index 67830b6..4a92d0d 100644 --- a/tethys/tdmethods/electricity.py +++ b/tethys/tdmethods/electricity.py @@ -1,25 +1,31 @@ -import os import numpy as np import pandas as pd import xarray as xr from tethys.datareader.regional import elec_sector_weights from tethys.datareader.gridded import load_file, interp_helper +from tethys.datareader.maps import load_region_map -def temporal_distribution(model): +def temporal_distribution(years, resolution=None, hddfile=None, cddfile=None, regionfile=None, gcam_db=None, + hddvar='hdd', cddvar='cdd', bounds=None): """Temporal downscaling of water demand for electricity generation using algorithm from Voisin et al. (2013)""" - years = range(model.years[0], model.years[-1] + 1) - hdd = load_file(model.temporal_files['hdd'], model.resolution, years, regrid_method='intensive')['hdd'] - cdd = load_file(model.temporal_files['cdd'], model.resolution, years, regrid_method='intensive')['cdd'] - - weights = elec_sector_weights(os.path.join(model.root, model.gcam_db)) - weights = weights[(weights.region.isin(model.inputs.region[model.inputs.sector == 'Electricity'])) & - (weights.region.isin(model.region_masks.region.data)) & - (weights.year.isin(model.years))].set_index(['region', 'sector', 'year'] - )['value'].to_xarray().fillna(0).astype(np.float32) + + if hasattr(years, 'temporal_config'): + model = years + cfg = dict((model.temporal_config or {}).get('Electricity', {}).get('kwargs', {}) or {}) + cfg.setdefault('bounds', model.bounds) + return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg) + + # get weights of heating/cooling/other by location and time + regions = load_region_map(regionfile, masks=True, target_resolution=resolution, bounds=bounds) + weights = elec_sector_weights(gcam_db) + weights = weights[(weights.region.isin(regions.region.data)) & (weights.year.isin(years))] + weights = weights.set_index(['region', 'sector', 'year'])['value'].to_xarray().fillna(0).astype(np.float32) + weights = weights.dot(regions.sel(region=weights.region), dims='region') weights = interp_helper(weights) - region_masks = model.region_masks.sel(region=weights.region) + hdd = load_file(hddfile, resolution, years, bounds=bounds, regrid_method='intensive', variables=[hddvar])[hddvar] + cdd = load_file(cddfile, resolution, years, bounds=bounds, regrid_method='intensive', variables=[cddvar])[cddvar] # this formula is annoying to implement because of the hdd/cdd thresholds and reallocation of weights hdd_sums = hdd.sum(dim='month') @@ -33,19 +39,13 @@ def temporal_distribution(model): hdd = xr.where((hdd_sums < 650) & (cdd_sums < 450), 1 / 12, hdd) cdd = xr.where((hdd_sums < 650) & (cdd_sums < 450), 1 / 12, cdd) - # redo sums based on reallocation - hdd_sums = hdd.sum(dim='month') - cdd_sums = cdd.sum(dim='month') # prevent 0/0 - hdd_sums = xr.where(hdd_sums != 0, hdd_sums, 1) - cdd_sums = xr.where(cdd_sums != 0, cdd_sums, 1) - hdd /= hdd_sums - cdd /= cdd_sums + hdd /= hdd.sum(dim='month').where(lambda x: x != 0, 1) + cdd /= cdd.sum(dim='month').where(lambda x: x != 0, 1) distribution = xr.concat([hdd, cdd, xr.full_like(hdd, 1/12)], dim=pd.Series(['Heating', 'Cooling', 'Other'], name='sector')) - distribution = distribution.where(region_masks, 0) - distribution = distribution.dot(weights, dims=('sector', 'region')) + distribution = distribution.dot(weights, dims='sector') return distribution diff --git a/tethys/tdmethods/irrigation.py b/tethys/tdmethods/irrigation.py index e9275b4..59781fd 100644 --- a/tethys/tdmethods/irrigation.py +++ b/tethys/tdmethods/irrigation.py @@ -1,16 +1,19 @@ from tethys.datareader.gridded import load_file +from tethys.datareader.maps import load_region_map -def temporal_distribution(model): +def temporal_distribution(years, resolution=None, regionfile=None, irrfile=None, irrvar='pirrww', bounds=None): """Temporal downscaling of irrigation water demand""" - years = range(model.years[0], model.years[-1] + 1) - irr = load_file(model.temporal_files['irr'], model.resolution, years, regrid_method='label')['pirrww'] + if hasattr(years, 'temporal_config'): + model = years + cfg = dict((model.temporal_config or {}).get('Irrigation', {}).get('kwargs', {}) or {}) + cfg.setdefault('bounds', model.bounds) + return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg) - irr_regions = model.inputs.region[(model.inputs.sector == 'Irrigation') & - (model.inputs.region.isin(model.region_masks.region.data))].unique() + irr = load_file(irrfile, resolution, years, bounds=bounds, regrid_method='label', variables=[irrvar])[irrvar] - region_masks = model.region_masks.sel(region=irr_regions) + region_masks = load_region_map(regionfile, masks=True, target_resolution=resolution, bounds=bounds) irr_grouped = irr.where(region_masks, 0) month_sums = irr_grouped.sum(dim=('lat', 'lon')) diff --git a/tethys/tdmethods/weights.py b/tethys/tdmethods/weights.py new file mode 100644 index 0000000..4af7e33 --- /dev/null +++ b/tethys/tdmethods/weights.py @@ -0,0 +1,19 @@ +from tethys.datareader.gridded import load_file + + +def temporal_distribution(years, resolution=None, weightfile=None, weightvar='weight', regrid_method='intensive', + prenormalized=False, bounds=None): + """Load a monthly distribution from filename""" + + if hasattr(years, 'temporal_config'): + model = years + cfg = dict((model.temporal_config or {}).get('Weights', {}).get('kwargs', {}) or {}) + cfg.setdefault('bounds', model.bounds) + return temporal_distribution(range(model.years[0], model.years[-1] + 1), model.resolution, **cfg) + + distribution = load_file(weightfile, resolution, years, bounds=bounds, regrid_method=regrid_method, + variables=[weightvar])[weightvar] + if prenormalized is False: + distribution /= distribution.sum(dim='month').where(lambda x: x != 0, 1) + + return distribution diff --git a/tethys/utils/install_supplement.py b/tethys/utils/install_supplement.py index 793d191..d7aef66 100644 --- a/tethys/utils/install_supplement.py +++ b/tethys/utils/install_supplement.py @@ -17,6 +17,8 @@ '2.0.0': 'https://zenodo.org/record/7569652/files/example.zip?download=1', '2.0.1': 'https://zenodo.org/record/7569652/files/example.zip?download=1', '2.0.2': 'https://zenodo.org/record/7569652/files/example.zip?download=1', + '2.1.0': 'https://zenodo.org/record/7569652/files/example.zip?download=1', + '2.2.0': 'https://zenodo.org/record/7569652/files/example.zip?download=1' } @@ -47,7 +49,7 @@ def get_example_data( example_data_directory = default_download_dir # get the current version of tethys that is installed - current_version = importlib.metadata.version('tethys') + current_version = importlib.metadata.version('tethys-downscaling') try: url = DATA_VERSION_URLS[current_version] diff --git a/tethys/utils/region_to_id_mapping.py b/tethys/utils/region_to_id_mapping.py index 3992f56..1457917 100644 --- a/tethys/utils/region_to_id_mapping.py +++ b/tethys/utils/region_to_id_mapping.py @@ -507,5 +507,479 @@ 'Papua New Guinea Coast': 173, 'Tasmania': 211, 'Solomon Islands': 176 + }, + "basin_name_mapping": { + 'Arctic Ocean Islands': 'ArcticIsl', + 'Northwest Territories': 'NWTerr', + 'Siberia North Coast': 'SiberiaN', + 'Siberia West Coast': 'SiberiaW', + 'Kara Sea Coast': 'KaraSea', + 'Lena': 'LenaR', + 'Pacific and Arctic Coast': 'PacArctic', + 'Scandinavia North Coast': 'ScndnvN', + 'Russia Barents Sea Coast': 'BarentsSea', + 'Mackenzie': 'Mackenzie', + 'Iceland': 'Iceland', + 'Sweden': 'Sweden', + 'Finland': 'Finland', + 'Northern Dvina': 'DvinaRN', + 'Hudson Bay Coast': 'HudsonBay', + 'Scotland': 'Scotland', + 'Neva': 'NevaR', + 'Volga': 'VolgaR', + 'Atlantic Ocean Seaboard': 'CanAtl', + 'Baltic Sea Coast': 'BalticSea', + 'Denmark Germany Coast': 'DnkGrmCst', + 'Narva': 'NarvaR', + 'Saskatchewan Nelson': 'NelsonR', + 'Ireland': 'Ireland', + 'Daugava': 'DaugavaR', + 'England and Wales': 'EngWales', + 'Fraser': 'FraserR', + 'Ems Weser': 'EmsWeserR', + 'Oder': 'OderR', + 'Wisla': 'WislaR', + 'Elbe': 'ElbeR', + 'Rhine': 'RhineR', + 'Poland Coast': 'PolandCst', + 'Churchill': 'ChurchillR', + 'Neman': 'NemanR', + 'Scheldt': 'ScheldtR', + 'Russia South East Coast': 'RusCstSE', + 'Ural': 'UralR', + 'Dnieper': 'DnieperR', + 'St Lawrence': 'StLwrncR', + 'France West Coast': 'FranceCstW', + 'Gobi Interior': 'Gobi', + 'Amur': 'AmurR', + 'Loire': 'LoireR', + 'Caspian Sea Coast': 'CaspianNE', + 'Seine': 'SeineR', + 'Black Sea North Coast': 'BlackSeaN', + 'Yenisei': 'YeniseiR', + 'Dniester': 'DniesterR', + 'Italy East Coast': 'ItalyCstE', + 'Japan': 'Japan', + 'Caspian Sea East Coast': 'CaspianE', + 'Don': 'DonR', + 'Danube': 'DanubeR', + 'Adriatic Sea Greece Black Sea Coast': 'AdrBlkSea', + 'Ob': 'ObR', + 'Po': 'PoR', + 'Amu Darya': 'AmuDaryaR', + 'Italy West Coast': 'ItalyCstW', + 'Spain Portugal Atlantic Coast': 'IberiaCst', + 'France South Coast': 'FranceCstS', + 'Rhone': 'RhoneR', + 'Mediterranean Sea Islands': 'MeditIsl', + 'Gironde': 'Gironde', + 'North and South Korea': 'Korea', + 'Bo Hai Korean Bay North Coast': 'BoHai', + 'Spain South and East Coast': 'SpainCstSE', + 'Lake Balkash': 'LBalkash', + 'Tiber': 'TiberR', + 'Black Sea South Coast': 'BlackSeaS', + 'Tagus': 'TagusR', + 'Caspian Sea South West Coast': 'CaspianSW', + 'Ebro': 'EbroR', + 'Douro': 'DouroR', + 'Mediterranean Sea East Coast': 'MeditE', + 'Syr Darya': 'SyrDaryaR', + 'Ziya He Interior': 'ZiyaHe', + 'China Coast': 'ChinaCst', + 'Huang He': 'HuangHeR', + 'Mediterranean South Coast': 'MeditS', + 'Guadiana': 'GuadianaR', + 'Central Iran': 'Iran', + 'Guadalquivir': 'GuadalqR', + 'Tigris Euphrates': 'TigrEuphR', + 'Tarim Interior': 'Tarim', + 'Africa North West Coast': 'AfrCstNW', + 'Nile': 'NileR', + 'Persian Gulf Coast': 'PersianGulf', + 'Indus': 'IndusR', + 'Farahrud': 'FarahrudR', + 'Baja California': 'MexBaja', + 'Plateau of Tibet Interior': 'Tibet', + 'Red Sea East Coast': 'RedSeaE', + 'Arabian Peninsula': 'ArabianP', + 'Dead Sea': 'DeadSea', + 'Mexico Northwest Coast': 'MexCstNW', + 'Helmand': 'Helmand', + 'Sinai Peninsula': 'SinaiP', + 'Eastern Jordan Syria': 'EJrdnSyr', + 'Africa Red Sea Gulf of Aden Coast': 'AfrCstNE', + 'Caribbean': 'Caribbean', + 'HamuniMashkel': 'HamuMashR', + 'Taiwan': 'Taiwan', + 'Arabian Sea Coast': 'ArabianSea', + 'North Gulf': 'MexGulf', + 'Yangtze': 'Yangtze', + 'Sabarmati': 'SabarmatiR', + 'Xun Jiang': 'XunJiang', + 'Hong (Red River)': 'Hong', + 'Ganges Bramaputra': 'GangesR', + 'Yucatan Peninsula': 'YucatanP', + 'South China Sea Coast': 'SChinaSea', + 'Mahi': 'MahiR', + 'Mexico Interior': 'MexInt', + 'Pacific Central Coast': 'MexCstW', + 'Bay of Bengal North East Coast': 'BengalBay', + 'Tapti': 'TaptiR', + 'Yasai': 'BengalW', + 'Philippines': 'Phlppns', + 'Brahmani': 'BrahmaniR', + 'North Marina Islands and Guam': 'Guam', + 'Mahanadi': 'MahanadiR', + 'Godavari': 'GodavariR', + 'Hainan': 'Hainan', + 'Mekong': 'Mekong', + 'Viet Nam Coast': 'VietnamCst', + 'Salween': 'Salween', + 'India North East Coast': 'IndCstNE', + 'India West Coast': 'IndCstW', + 'Papaloapan': 'Papaloapan', + 'Rio Lerma': 'RioLerma', + 'Rio Verde': 'RioVerde', + 'Grijalva Usumacinta': 'GrijUsuR', + 'Rio Balsas': 'RioBalsas', + 'Southern Central America': 'CntAmer', + 'Isthmus of Tehuantepec': 'Tehuantpc', + 'Irrawaddy': 'IrrawaddyR', + 'Sittaung': 'SittaungR', + 'Peninsula Malaysia': 'MalaysiaP', + 'Krishna': 'KrishnaR', + 'Andaman Nicobar Islands': 'AdnNicIsl', + 'Africa West Coast': 'AfrCstW', + 'Caribbean Coast': 'SAmerCstN', + 'Africa North Interior': 'AfrIntN', + 'India East Coast': 'IndCstE', + 'Chao Phraya': 'ChaoPhrR', + 'Pennar': 'PennarR', + 'Gulf of Thailand Coast': 'ThaiGulf', + 'Niger': 'NigerR', + 'Micronesia': 'Micronesia', + 'Lake Chad': 'LChad', + 'Senegal': 'SenegalR', + 'Cauvery': 'CauveryR', + 'Sri Lanka': 'SriLanka', + 'India South Coast': 'IndCstS', + 'Orinoco': 'OrinocoR', + 'Colombia Ecuador Pacific Coast': 'ColEcuaCst', + 'Palau and East Indonesia': 'IdnE', + 'North Borneo Coast': 'BorneoCstN', + 'Volta': 'VoltaR', + 'Northeast South America South Atlantic Coast': 'SAmerCstNE', + 'Gulf of Guinea': 'GuineaGulf', + 'Sumatra': 'Sumatra', + 'Sulawesi': 'Sulawesi', + 'Kalimantan': 'Kalimantan', + 'Magdalena': 'MagdalenaR', + 'Irian Jaya Coast': 'IrianJaya', + 'Amazon': 'AmazonR', + 'South Chile Pacific Coast': 'ChileCstS', + 'Shebelli Juba': 'ShebJubR', + 'Africa East Central Coast': 'AfrCstE', + 'North Brazil South Atlantic Coast': 'BrzCstN', + 'Papua New Guinea Coast': 'PapuaCst', + 'Tocantins': 'TocantinsR', + 'Java Timor': 'JavaTimor', + 'Solomon Islands': 'SolomonIsl', + 'Madagascar': 'Madagascar', + 'Sepik': 'SepikR', + 'Rift Valley': 'RiftValley', + 'Peru Pacific Coast': 'PeruCst', + 'Fly': 'FlyR', + 'Angola Coast': 'AngolaCst', + 'Congo': 'CongoR', + 'Australia North Coast': 'AusCstN', + 'South Pacific Islands': 'NewCaledn', + 'East Brazil South Atlantic Coast': 'BrzCstE', + 'Parnaiba': 'ParnaibaR', + 'Zambezi': 'ZambeziR', + 'Australia East Coast': 'AusCstE', + 'Africa Indian Ocean Coast': 'AfrCstSE', + 'Australia West Coast': 'AusCstW', + 'Sao Francisco': 'SaoFrancR', + 'Australia Interior': 'AusInt', + 'Orange': 'OrangeR', + 'Uruguay Brazil South Atlantic Coast': 'BrzCstS', + 'Namibia Coast': 'AfrCstSW', + 'Africa South Interior': 'AfrIntS', + 'South Africa South Coast': 'AfrCstS', + 'Limpopo': 'LimpopoR', + 'La Puna Region': 'LaPuna', + 'New Zealand': 'NewZealand', + 'Australia South Coast': 'AusCstS', + 'Mar Chiquita': 'MarChiq', + 'South Africa West Coast': 'AfrCstSSW', + 'Salinas Grandes': 'Salinas', + 'La Plata': 'RioLaPlata', + 'North Chile Pacific Coast': 'ChileCstN', + 'Murray Darling': 'MurrayDrlg', + 'Pampas Region': 'Pampas', + 'North Argentina South Atlantic Coast': 'ArgCstN', + 'Tasmania': 'Tasmania', + 'South America Colorado': 'ArgColoR', + 'Negro': 'NegroR', + 'Central Patagonia Highlands': 'Patagonia', + 'South Argentina South Atlantic Coast': 'ArgCstS', + 'Antarctica': 'Antarctica', + 'California River Basin': 'California', + 'Upper Mississippi Basin': 'MissppRN', + 'Lower Mississippi River Basin': 'MissppRS', + 'Upper Colorado River Basin': 'UsaColoRN', + 'Lower Colorado River Basin': 'UsaColoRS', + 'Great Basin': 'GreatBasin', + 'Missouri River Basin': 'MissouriR', + 'Arkansas White Red Basin': 'ArkWhtRedR', + 'Texas Gulf Coast Basin': 'TexasCst', + 'South Atlantic Gulf Basin': 'UsaCstSE', + 'Great Lakes Basin': 'GreatLakes', + 'Ohio River Basin': 'OhioR', + 'Pacific Northwest Basin': 'UsaPacNW', + 'Tennessee River Basin': 'TennR', + 'Rio Grande River Basin': 'RioGrande', + 'New England Basin': 'UsaCstNE', + 'Mid Atlantic Basin': 'UsaCstE', + 'Hawaii': 'Hawaii', + 'Narmada': 'NarmadaR' + }, + "basin_name_mapping_im3": { + 'Arctic Ocean Islands': 'ArcticIsl', + 'Northwest Territories': 'NWTerr', + 'Siberia North Coast': 'SiberiaN', + 'Siberia West Coast': 'SiberiaW', + 'Kara Sea Coast': 'KaraSea', + 'Lena': 'LenaR', + 'Pacific and Arctic Coast': 'PacArctic', + 'Scandinavia North Coast': 'ScndnvN', + 'Russia Barents Sea Coast': 'BarentsSea', + 'Mackenzie': 'Mackenzie', + 'Iceland': 'Iceland', + 'Sweden': 'Sweden', + 'Finland': 'Finland', + 'Northern Dvina': 'DvinaRN', + 'Hudson Bay Coast': 'HudsonBay', + 'Scotland': 'Scotland', + 'Neva': 'NevaR', + 'Volga': 'VolgaR', + 'Atlantic Ocean Seaboard': 'CanAtl', + 'Baltic Sea Coast': 'BalticSea', + 'Denmark Germany Coast': 'DnkGrmCst', + 'Narva': 'NarvaR', + 'Saskatchewan Nelson': 'NelsonR', + 'Ireland': 'Ireland', + 'Daugava': 'DaugavaR', + 'England and Wales': 'EngWales', + 'Fraser': 'FraserR', + 'Ems Weser': 'EmsWeserR', + 'Oder': 'OderR', + 'Wisla': 'WislaR', + 'Elbe': 'ElbeR', + 'Rhine': 'RhineR', + 'Poland Coast': 'PolandCst', + 'Churchill': 'ChurchillR', + 'Neman': 'NemanR', + 'Scheldt': 'ScheldtR', + 'Russia South East Coast': 'RusCstSE', + 'Ural': 'UralR', + 'Dnieper': 'DnieperR', + 'St Lawrence': 'StLwrncR', + 'France West Coast': 'FranceCstW', + 'Gobi Interior': 'Gobi', + 'Amur': 'AmurR', + 'Loire': 'LoireR', + 'Caspian Sea Coast': 'CaspianNE', + 'Seine': 'SeineR', + 'Black Sea North Coast': 'BlackSeaN', + 'Yenisey': 'YeniseiR', + 'Dniester': 'DniesterR', + 'Italy East Coast': 'ItalyCstE', + 'Japan': 'Japan', + 'Caspian Sea East Coast': 'CaspianE', + 'Don': 'DonR', + 'Danube': 'DanubeR', + 'Adriatic Sea Greece Black Sea Coast': 'AdrBlkSea', + 'Ob': 'ObR', + 'Po': 'PoR', + 'Amu Darya': 'AmuDaryaR', + 'Italy West Coast': 'ItalyCstW', + 'Spain Portugal Atlantic Coast': 'IberiaCst', + 'France South Coast': 'FranceCstS', + 'Rhone': 'RhoneR', + 'Mediterranean Sea Islands': 'MeditIsl', + 'Gironde': 'Gironde', + 'North and South Korea': 'Korea', + 'Bo Hai Korean Bay North Coast': 'BoHai', + 'Spain South and East Coast': 'SpainCstSE', + 'Lake Balkash': 'LBalkash', + 'Tiber': 'TiberR', + 'Black Sea South Coast': 'BlackSeaS', + 'Tagus': 'TagusR', + 'Caspian Sea South West Coast': 'CaspianSW', + 'Ebro': 'EbroR', + 'Douro': 'DouroR', + 'Mediterranean Sea East Coast': 'MeditE', + 'Syr Darya': 'SyrDaryaR', + 'Ziya He Interior': 'ZiyaHe', + 'China Coast': 'ChinaCst', + 'Huang He': 'HuangHeR', + 'Mediterranean South Coast': 'MeditS', + 'Guadiana': 'GuadianaR', + 'Central Iran': 'Iran', + 'Guadalquivir': 'GuadalqR', + 'Tigris Euphrates': 'TigrEuphR', + 'Tarim Interior': 'Tarim', + 'Africa North West Coast': 'AfrCstNW', + 'Nile': 'NileR', + 'Persian Gulf Coast': 'PersianGulf', + 'Indus': 'IndusR', + 'Farahrud': 'FarahrudR', + 'Baja California': 'MexBaja', + 'Plateau of Tibet Interior': 'Tibet', + 'Red Sea East Coast': 'RedSeaE', + 'Arabian Peninsula': 'ArabianP', + 'Dead Sea': 'DeadSea', + 'Mexico Northwest Coast': 'MexCstNW', + 'Helmand': 'Helmand', + 'Sinai Peninsula': 'SinaiP', + 'Eastern Jordan Syria': 'EJrdnSyr', + 'Africa Red Sea Gulf of Aden Coast': 'AfrCstNE', + 'Caribbean': 'Caribbean', + 'Hamun i Mashkel': 'HamuMashR', + 'Taiwan': 'Taiwan', + 'Arabian Sea Coast': 'ArabianSea', + 'North Gulf': 'MexGulf', + 'Yangtze': 'Yangtze', + 'Sabarmati': 'SabarmatiR', + 'Xun Jiang': 'XunJiang', + 'Hong Red River': 'Hong', + 'Ganges Bramaputra': 'GangesR', + 'Yucatan Peninsula': 'YucatanP', + 'South China Sea Coast': 'SChinaSea', + 'Mahi': 'MahiR', + 'Mexico Interior': 'MexInt', + 'Pacific Central Coast': 'MexCstW', + 'Bay of Bengal North East Coast': 'BengalBay', + 'Tapti': 'TaptiR', + 'Yasai': 'BengalW', + 'Philippines': 'Phlppns', + 'Brahamani': 'BrahmaniR', + 'North Marina Islands and Guam': 'Guam', + 'Mahandi': 'MahanadiR', + 'Godavari': 'GodavariR', + 'Hainan': 'Hainan', + 'Mekong': 'Mekong', + 'Viet Nam Coast': 'VietnamCst', + 'Salween': 'Salween', + 'India North East Coast': 'IndCstNE', + 'India West Coast': 'IndCstW', + 'Papaloapan': 'Papaloapan', + 'Rio Lerma': 'RioLerma', + 'Rio Verde': 'RioVerde', + 'Grijalva Usumacinta': 'GrijUsuR', + 'Rio Balsas': 'RioBalsas', + 'Southern Central America': 'CntAmer', + 'Isthmus of Tehuantepec': 'Tehuantpc', + 'Irrawaddy': 'IrrawaddyR', + 'Sittang': 'SittaungR', + 'Peninsula Malaysia': 'MalaysiaP', + 'Krishna': 'KrishnaR', + 'Andaman Nicobar Islands': 'AdnNicIsl', + 'Africa West Coast': 'AfrCstW', + 'Caribbean Coast': 'SAmerCstN', + 'Africa North Interior': 'AfrIntN', + 'India East Coast': 'IndCstE', + 'Chao Phraya': 'ChaoPhrR', + 'Pennar': 'PennarR', + 'Gulf of Thailand Coast': 'ThaiGulf', + 'Niger': 'NigerR', + 'Micronesia': 'Micronesia', + 'Lake Chad': 'LChad', + 'Senegal': 'SenegalR', + 'Cauvery': 'CauveryR', + 'Sri Lanka': 'SriLanka', + 'India South Coast': 'IndCstS', + 'Orinoco': 'OrinocoR', + 'Colombia Ecuador Pacific Coast': 'ColEcuaCst', + 'Palau and East Indonesia': 'IdnE', + 'North Borneo Coast': 'BorneoCstN', + 'Volta': 'VoltaR', + 'Northeast South America South Atlantic Coast': 'SAmerCstNE', + 'Gulf of Guinea': 'GuineaGulf', + 'Sumatra': 'Sumatra', + 'Sulawesi': 'Sulawesi', + 'Kalimantan': 'Kalimantan', + 'Magdalena': 'MagdalenaR', + 'Irian Jaya Coast': 'IrianJaya', + 'Amazon': 'AmazonR', + 'South Chile Pacific Coast': 'ChileCstS', + 'Shebelli Juba': 'ShebJubR', + 'Africa East Central Coast': 'AfrCstE', + 'North Brazil South Atlantic Coast': 'BrzCstN', + 'Papua New Guinea Coast': 'PapuaCst', + 'Tocantins': 'TocantinsR', + 'Java Timor': 'JavaTimor', + 'Solomon Islands': 'SolomonIsl', + 'Madasgacar': 'Madagascar', + 'Sepik': 'SepikR', + 'Rift Valley': 'RiftValley', + 'Peru Pacific Coast': 'PeruCst', + 'Fly': 'FlyR', + 'Angola Coast': 'AngolaCst', + 'Congo': 'CongoR', + 'Australia North Coast': 'AusCstN', + 'South Pacific Islands': 'NewCaledn', + 'East Brazil South Atlantic Coast': 'BrzCstE', + 'Parnaiba': 'ParnaibaR', + 'Zambezi': 'ZambeziR', + 'Australia East Coast': 'AusCstE', + 'Africa Indian Ocean Coast': 'AfrCstSE', + 'Australia West Coast': 'AusCstW', + 'Sao Francisco': 'SaoFrancR', + 'Australia Interior': 'AusInt', + 'Orange': 'OrangeR', + 'Uruguay Brazil South Atlantic Coast': 'BrzCstS', + 'Namibia Coast': 'AfrCstSW', + 'Africa South Interior': 'AfrIntS', + 'South Africa South Coast': 'AfrCstS', + 'Limpopo': 'LimpopoR', + 'La Puna Region': 'LaPuna', + 'New Zealand': 'NewZealand', + 'Australia South Coast': 'AusCstS', + 'Mar Chiquita': 'MarChiq', + 'South Africa West Coast': 'AfrCstSSW', + 'Salinas Grandes': 'Salinas', + 'La Plata': 'RioLaPlata', + 'North Chile Pacific Coast': 'ChileCstN', + 'Murray Darling': 'MurrayDrlg', + 'Pampas Region': 'Pampas', + 'North Argentina South Atlantic Coast': 'ArgCstN', + 'Tasmania': 'Tasmania', + 'South America Colorado': 'ArgColoR', + 'Negro': 'NegroR', + 'Central Patagonia Highlands': 'Patagonia', + 'South Argentina South Atlantic Coast': 'ArgCstS', + 'Antarctica': 'Antarctica', + 'California River': 'California', + 'Upper Mississippi': 'MissppRN', + 'Lower Mississippi River': 'MissppRS', + 'Upper Colorado River': 'UsaColoRN', + 'Lower Colorado River': 'UsaColoRS', + 'Great': 'GreatBasin', + 'Missouri River': 'MissouriR', + 'Arkansas White Red': 'ArkWhtRedR', + 'Texas Gulf Coast': 'TexasCst', + 'South Atlantic Gulf': 'UsaCstSE', + 'Great Lakes': 'GreatLakes', + 'Ohio River': 'OhioR', + 'Pacific Northwest': 'UsaPacNW', + 'Tennessee River': 'TennR', + 'Rio Grande River': 'RioGrande', + 'New England': 'UsaCstNE', + 'Mid Atlantic': 'UsaCstE', + 'Hawaii': 'Hawaii', + 'Narmada': 'NarmadaR' } } diff --git a/tethys/utils/source_disaggregation.py b/tethys/utils/source_disaggregation.py new file mode 100644 index 0000000..cb0976e --- /dev/null +++ b/tethys/utils/source_disaggregation.py @@ -0,0 +1,110 @@ +import os +import pandas as pd +import xarray as xr + +import gcamreader + +from tethys.datareader.easy_query import easy_query +from tethys.datareader.regional import extract_resource_name +from tethys.utils.region_to_id_mapping import name_to_id_mapping + + +class get_source_shares: + def __init__(self, gcam_db, basin_name_mapping="basin_name_mapping_im3", demand_type='withdrawals', region_masks=None, years=None): + """ + Initialize get_source_shares with parameters for GCAM database and basin mapping. + + :param gcam_db: Path to the GCAM database file. + :param basin_name_mapping: Basin name mapping dictionary, can change to other mapping in name_to_id_mapping. + :param demand_type: Type of demand, either 'withdrawals' or 'consumption'. + :param region_masks: xarray DataArray containing region masks. + :param years: List of years to be included in the shares. + """ + if region_masks is None: + raise ValueError("region_masks is required to compute gridded shares") + if years is None: + raise ValueError("years is required to compute gridded shares") + + dbpath, dbfile = os.path.split(gcam_db) + self.conn = gcamreader.LocalDBConn(dbpath, dbfile) + self.basin_name_mapping = basin_name_mapping + self.demand_type = demand_type + self.region_masks = region_masks + self.years = list(years) + + def calculate_shares(self): + """ + Query the GCAM database, process water data for the specified demand type, + and calculate runoff and groundwater shares by region and year. + + :return: Processed DataFrame with calculated shares. + """ + # query GCAM database for water used by source for the configured demand type + if self.demand_type not in ('withdrawals', 'consumption'): + raise ValueError(f"Unsupported demand_type: {self.demand_type!r}") + query_type = f"*_water {self.demand_type}" + shares_df = self.conn.runQuery(easy_query('production', replace_filters=True, resource=query_type)) + # extract and clean resource names (e.g., remove '_water withdrawals') + shares_df['resource'] = shares_df['resource'].apply(extract_resource_name) + + # rest of the mappings should be fine, old GCAM versions have typos in basin names + if self.basin_name_mapping == "basin_name_mapping_im3": + shares_df['resource'] = shares_df['resource'].str.replace('_', ' ').str.replace('-', ' ') # replace '_' and '-' with spaces + + # map resource names to short basin names and concatenate region and resource to work with region_masks + shares_df['resourcemap'] = shares_df['resource'].map(name_to_id_mapping[self.basin_name_mapping]) + shares_df['region_resourcemap'] = shares_df['region'] + '_' + shares_df['resourcemap'] + + # calculate shares of runoff and groundwater + shares_df['share'] = shares_df.groupby( + ['scenario', 'region', 'region_resourcemap', 'year'] + )['value'].transform(lambda x: x / x.sum() if x.sum() != 0 else 0) + + # capture basins that are mostly belonging to other countries + if self.basin_name_mapping == 'basin_name_mapping_im3': + shares_df = shares_df.replace({'region_resourcemap': { + 'Canada_FraserR': 'USA_FraserR', + 'Canada_GreatLakes': 'USA_GreatLakes', + 'Mexico_MexCstNW': 'USA_MexCstNW', + 'Canada_NelsonR': 'USA_NelsonR', + }}) + + # filter shares based on the region masks from the config file, otherwise all regions would be included + shares_df = shares_df[shares_df['region_resourcemap'].isin(self.region_masks.region.values)] + + return shares_df + + def generate_gridded_shares(self, shares_df): + """ + Generate gridded shares by subresource (e.g., runoff, groundwater). + + :param shares_df: DataFrame containing shares calculated by `calculate_shares`. + :return: Dictionary of gridded shares by subresource. + """ + gridded_shares_by_subresource = {} + for subresource in shares_df.subresource.unique(): + gridded_shares_by_year = [] + for year in self.years: + gridded_shares = None + for basin in shares_df.region_resourcemap.unique(): + sel = shares_df[ + (shares_df['year'] == year) & + (shares_df['region_resourcemap'] == basin) & + (shares_df['subresource'] == subresource) + ] + share_val = 0.0 if sel.empty else float(sel.iloc[0].share) + gridded_shares = xr.where( + self.region_masks.sel(region=basin).load(), + share_val, + 0 if gridded_shares is None else gridded_shares, + keep_attrs=False, + ) + gridded_shares_by_year.append(gridded_shares) + + # Stack years for the subresource + gridded_shares_by_subresource[subresource] = xr.concat( + gridded_shares_by_year, + dim=pd.Index(self.years, name="year"), + ).rename('share') + + return gridded_shares_by_subresource