diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..fa02be2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.12-slim + +WORKDIR /project + +COPY . . + +RUN pip install --upgrade pip +RUN pip install pytest sphinx sphinx-autobuild + +RUN pip install -e . + + +CMD ["pytest", "-v", "hdp/tests/"] \ No newline at end of file diff --git a/hdp/__init__.py b/hdp/__init__.py index 8c6005b..e69de29 100644 --- a/hdp/__init__.py +++ b/hdp/__init__.py @@ -1,6 +0,0 @@ -from hdp import hdp -from hdp import measure -from hdp import threshold -from hdp import metric -from hdp import utils -from hdp import definitions \ No newline at end of file diff --git a/hdp/graphics/figure.py b/hdp/graphics/figure.py index 27016de..32a345b 100644 --- a/hdp/graphics/figure.py +++ b/hdp/graphics/figure.py @@ -135,11 +135,13 @@ def get_unique_metric_names(hw_ds): return unique_metrics -def plot_map(metric_da, axis, cmap="hot"): +def plot_map(metric_da, ax, cmap="hot"): cmap_obj = plt.get_cmap(cmap) + metric_da = metric_da.transpose(..., "lon") + cyclic_values, cyclic_lons = add_cyclic_point(metric_da.values, metric_da.lon.values, axis=-1) - ax_contour = axis.contourf(cyclic_lons, metric_da.lat.values, cyclic_values, transform=ccrs.PlateCarree(), cmap=cmap_obj) - axis.coastlines() + ax_contour = ax.contourf(cyclic_lons, metric_da.lat.values, cyclic_values, transform=ccrs.PlateCarree(), cmap=cmap_obj) + ax.coastlines() return ax_contour diff --git a/hdp/graphics/notebook.py b/hdp/graphics/notebook.py index 68dd900..1012742 100644 --- a/hdp/graphics/notebook.py +++ b/hdp/graphics/notebook.py @@ -3,6 +3,9 @@ import nbformat from io import BytesIO from datetime import datetime +from hdp.utils import get_func_description +from hdp.graphics.figure import * +from tqdm.auto import tqdm class HDPNotebook(): @@ -92,4 +95,50 @@ def save_notebook(self, path, title=None): notebook_node.cells.append(cell) with open(path, 'w') as nb_file: - nbformat.write(notebook_node, nb_file) \ No newline at end of file + nbformat.write(notebook_node, nb_file) + + +def create_notebook(hw_ds): + assert "hdp_type" in hw_ds.attrs, "Missing 'hdp_type' attribute." + + notebook = HDPNotebook() + + if hw_ds.attrs["hdp_type"] == "measure": + pass + elif hw_ds.attrs["hdp_type"] == "threshold": + pass + elif hw_ds.attrs["hdp_type"] == "metric": + index = 1 + + section_name = f"Figures {index}" + notebook.create_section(section_name) + desc = get_func_description(plot_multi_measure_metric_comparisons) + notebook.add_markdown_cell(f"### Figure {index}.2 \n{desc}", section_name) + notebook.add_figure_cell(plot_multi_measure_metric_comparisons(hw_ds), section_name, alt_text=f"{section_name}") + + index += 1 + for metric in tqdm(list(hw_ds.data_vars), desc="Generating figures:"): + section_name = f"Figures {index}-{metric}" + + notebook.create_section(section_name) + notebook.add_markdown_cell("Description of these figures.", section_name) + + desc = get_func_description(plot_metric_parameter_comparison) + notebook.add_markdown_cell(f"### Figure {index}.1 \n{desc}", section_name) + notebook.add_figure_cell(plot_metric_parameter_comparison(hw_ds[metric]), section_name, alt_text=f"{section_name}") + + desc = get_func_description(plot_metric_timeseries) + notebook.add_markdown_cell(f"### Figure {index}.2 \n{desc}", section_name) + notebook.add_figure_cell(plot_metric_timeseries(hw_ds[metric]), section_name, alt_text=f"{section_name}") + + iindex = 3 + for fig in plot_metric_decadal_maps(hw_ds[metric]): + desc = get_func_description(plot_metric_decadal_maps) + notebook.add_markdown_cell(f"### Figure {index}.{iindex} \n{desc}", section_name) + notebook.add_figure_cell(fig, section_name, alt_text=f"{section_name}") + iindex += 1 + index += 1 + else: + raise ValueError(f"Unexpected value for 'hdp_type' attribute, '{hw_ds.attrs["hdp_type"]}' is not 'measure', 'threshold', or 'metric'.") + + return notebook \ No newline at end of file diff --git a/hdp/hdp.py b/hdp/hdp.py deleted file mode 100644 index 25be476..0000000 --- a/hdp/hdp.py +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env python -""" -hdp.py - -Heatwave Diagnostics Package (HDP) - -Entry point for package. - -Developer: Cameron Cummins -Contact: cameron.cummins@utexas.edu -""" -from hdp.graphics.notebook import HDPNotebook -from hdp.graphics.figure import * -from hdp.utils import get_func_description -from tqdm.auto import tqdm - - -def create_notebook(hw_ds): - assert "hdp_type" in hw_ds.attrs, "Missing 'hdp_type' attribute." - - notebook = HDPNotebook() - - if hw_ds.attrs["hdp_type"] == "measure": - pass - elif hw_ds.attrs["hdp_type"] == "threshold": - pass - elif hw_ds.attrs["hdp_type"] == "metric": - index = 1 - - section_name = f"Figures {index}" - notebook.create_section(section_name) - desc = get_func_description(plot_multi_measure_metric_comparisons) - notebook.add_markdown_cell(f"### Figure {index}.2 \n{desc}", section_name) - notebook.add_figure_cell(plot_multi_measure_metric_comparisons(hw_ds), section_name, alt_text=f"{section_name}") - - index += 1 - for metric in tqdm(list(hw_ds.data_vars), desc="Generating figures:"): - section_name = f"Figures {index}-{metric}" - - notebook.create_section(section_name) - notebook.add_markdown_cell("Description of these figures.", section_name) - - desc = get_func_description(plot_metric_parameter_comparison) - notebook.add_markdown_cell(f"### Figure {index}.1 \n{desc}", section_name) - notebook.add_figure_cell(plot_metric_parameter_comparison(hw_ds[metric]), section_name, alt_text=f"{section_name}") - - desc = get_func_description(plot_metric_timeseries) - notebook.add_markdown_cell(f"### Figure {index}.2 \n{desc}", section_name) - notebook.add_figure_cell(plot_metric_timeseries(hw_ds[metric]), section_name, alt_text=f"{section_name}") - - iindex = 3 - for fig in plot_metric_decadal_maps(hw_ds[metric]): - desc = get_func_description(plot_metric_decadal_maps) - notebook.add_markdown_cell(f"### Figure {index}.{iindex} \n{desc}", section_name) - notebook.add_figure_cell(fig, section_name, alt_text=f"{section_name}") - iindex += 1 - index += 1 - else: - raise ValueError(f"Unexpected value for 'hdp_type' attribute, '{hw_ds.attrs["hdp_type"]}' is not 'measure', 'threshold', or 'metric'.") - - return notebook \ No newline at end of file diff --git a/hdp/metric.py b/hdp/metric.py index 9536630..c33e822 100644 --- a/hdp/metric.py +++ b/hdp/metric.py @@ -462,7 +462,7 @@ def compute_individual_metrics(measure: xarray.DataArray, threshold: xarray.Data start_ts = cftime.datetime(ds.year[0], 1, 1, calendar=measure.time.values[0].calendar) end_ts = cftime.datetime(ds.year[-1], 1, 1, calendar=measure.time.values[0].calendar) - ds = ds.rename(dict(year="time")).assign_coords(dict(time=xarray.cftime_range(start_ts, end_ts, periods=ds.year.size))) + ds = ds.rename(dict(year="time")).assign_coords(dict(time=xarray.date_range(start_ts, end_ts, periods=ds.year.size, use_cftime=True))) ds.attrs |= { "description": f"Heatwave metric dataset generated by Heatwave Diagnostics Package (HDP v{get_version()})", diff --git a/hdp/sample.py b/hdp/sample.py new file mode 100644 index 0000000..6f355af --- /dev/null +++ b/hdp/sample.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +""" +hdp.py + +Heatwave Diagnostics Package (HDP) + +Entry point for package. + +Developer: Cameron Cummins +Contact: cameron.cummins@utexas.edu +""" +from hdp.graphics.notebook import create_notebook +from os.path import isdir +import hdp.measure +import hdp.threshold +import hdp.metric +import numpy as np +import sys + + +def generate_sample_data(output_dir): + if not isdir(output_dir): + raise RuntimeError(f"Output directory '{output_dir}' does not exist!") + + sample_control_temp = hdp.utils.generate_test_control_dataarray() + sample_warming_temp = hdp.utils.generate_test_warming_dataarray() + + baseline_measures = hdp.measure.format_standard_measures( + temp_datasets=[sample_control_temp] + ) + test_measures = hdp.measure.format_standard_measures( + temp_datasets=[sample_warming_temp] + ) + + percentiles = np.arange(0.9, 1.0, 0.01) + + thresholds_dataset = hdp.threshold.compute_thresholds( + baseline_measures, + percentiles + ) + + definitions = [[3,0,0], [3,1,1], [4,0,0], [4,1,1], [5,0,0], [5,1,1]] + + metrics_dataset = hdp.metric.compute_group_metrics(test_measures, thresholds_dataset, definitions, include_threshold=True) + metrics_dataset.to_netcdf(f"{output_dir}/sample_hw_metrics.nc", mode='w') + + figure_notebook = create_notebook(metrics_dataset) + figure_notebook.save_notebook(f"{output_dir}/sample_hw_summary_figures.ipynb") + + sample_control_temp = sample_control_temp.to_dataset() + sample_control_temp.attrs["description"] = "Mock control temperature dataset generated by HDP for unit testing." + sample_control_temp.to_netcdf(f"{output_dir}/sample_control_temp.nc", mode='w') + + sample_warming_temp = sample_warming_temp.to_dataset() + sample_warming_temp.attrs["description"] = "Mock temperature dataset with warming trend generated by HDP for unit testing." + sample_warming_temp.to_netcdf(f"{output_dir}/sample_warming_temp.nc", mode='w') + +if __name__ == "__main__": + print("Generating testing data and simulating a full data-to-figure workflow: ") + if len(sys.argv) != 2: + assert Exception("Please specifiy the path to output sample data and results to.") + generate_sample_data(sys.argv[1]) + print("Done!") \ No newline at end of file diff --git a/hdp/tests/test_utils.py b/hdp/tests/test_utils.py index 2e1bcbb..f58d242 100644 --- a/hdp/tests/test_utils.py +++ b/hdp/tests/test_utils.py @@ -1,46 +1,57 @@ -import hdp.utils -import xarray +from hdp.utils import * import numpy as np - +from math import isclose +from xarray import DataArray def test_get_time_stamp(): - assert type(hdp.utils.get_time_stamp()) is str + assert type(get_time_stamp()) is str def test_get_version(): - assert type(hdp.utils.get_version()) is str - - -def test_synthetic_data_functions(): - var = "test" - center_val = 10 - amplitude_val = 1 - units = "test_units" - - ds = hdp.utils.generate_synthetic_dataset(name=var, units=units, center=center_val, amplitude=amplitude_val) - assert type(ds) is xarray.Dataset - assert len(ds.data_vars) == 1 - assert var in ds - - assert "units" in ds[var].attrs - assert ds[var].attrs["units"] == units - assert ds[var].dims == ("lat", "lon", "time") - assert ds[var].dtype == float - assert ds[var].time.values[0].calendar == "noleap" - assert ds[var].lat.size > 1 - assert ds[var].lon.size > 1 - assert ds[var].time.size >= 2*365 - - data = ds[var].compute() - - assert np.isclose(data.mean(), center_val) - assert np.isclose(data.max(), center_val + amplitude_val) - assert np.isclose(data.values, data.values[0]).all() - - exceed_data = hdp.utils.generate_exceedance_dataarray(ds[var], exceedance_pattern=[1, 0, 1, 0]).compute() - - assert np.isclose(exceed_data.mean(), center_val + 0.5) - assert exceed_data.attrs == ds[var].attrs - assert exceed_data.dims == ds[var].dims - assert exceed_data.shape == ds[var].shape - assert exceed_data.dtype == ds[var].dtype \ No newline at end of file + assert type(get_version()) is str + + +def test_control_dataarray(): + control_da = generate_test_control_dataarray(grid_shape=(1,1), start_date="2000", end_date="2100") + assert type(control_da) is DataArray + var = control_da.name + assert control_da.attrs["units"] == "degC" + assert control_da.dims == ("lon", "lat", "time") + assert control_da.dtype == float + assert control_da.time.values[0].calendar == "noleap" + assert control_da.time.values.size >= 365 + assert np.sum(np.isnan(control_da)) == 0 + + control_slope = np.polyfit(np.arange(control_da["time"].size), control_da.mean(dim=["lat", "lon"]).values, 1)[0] + assert np.abs(control_slope) < 0.01 + + +def test_warming_dataarray(): + warming_da = generate_test_warming_dataarray(grid_shape=(1,1), start_date="2000", end_date="2100") + avg_da = warming_da.mean(dim=["lat", "lon"]).values + warm_slope = np.polyfit(np.arange(avg_da.size), avg_da, 1)[0] + + assert warm_slope > 0 + assert not isclose(warm_slope, 0) + + +def test_rh_dataarray(): + rh_da = generate_test_rh_dataarray() + + assert rh_da.max() <= 1 + assert rh_da.min() >= 0 + + +def test_defaults_compatibility(): + control_da = generate_test_control_dataarray() + warming_da = generate_test_warming_dataarray() + rh_da = generate_test_rh_dataarray() + + assert control_da.shape == warming_da.shape + assert warming_da.shape == rh_da.shape + + assert control_da.dims == warming_da.dims + assert warming_da.dims == rh_da.dims + + assert control_da.name == warming_da.name + assert control_da.units == warming_da.units \ No newline at end of file diff --git a/hdp/tests/test_workflow.py b/hdp/tests/test_workflow.py index f00de44..ee603b5 100644 --- a/hdp/tests/test_workflow.py +++ b/hdp/tests/test_workflow.py @@ -1,11 +1,22 @@ +from hdp.graphics.notebook import create_notebook import pytest -import hdp +import hdp.utils +import hdp.measure +import hdp.threshold +import hdp.metric import numpy as np -def test_full_data_workflow(): - baseline_temp = hdp.utils.generate_synthetic_dataset(name="temp")["temp"] - baseline_rh = hdp.utils.generate_synthetic_dataset(name="rh", units="%", center=90, amplitude=15)["rh"] +@pytest.fixture(scope="function") +def temp_output_dir(tmp_path_factory): + return tmp_path_factory.mktemp("output_dir") + + +def test_full_data_workflow(temp_output_dir): + grid_shape = (2, 3) + + baseline_temp = hdp.utils.generate_test_control_dataarray(grid_shape=grid_shape).rename("temp") + baseline_rh = hdp.utils.generate_test_rh_dataarray(grid_shape=grid_shape).rename("rh") baseline_measures = hdp.measure.format_standard_measures([baseline_temp], rh=baseline_rh) percentiles = np.arange(0.9, 1, 0.01) @@ -14,9 +25,8 @@ def test_full_data_workflow(): exceedance_pattern = [1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1] - test_temp = hdp.utils.generate_synthetic_dataset(name="temp")["temp"] - test_temp = hdp.utils.generate_exceedance_dataarray(test_temp, exceedance_pattern) - test_rh = hdp.utils.generate_synthetic_dataset(name="rh", units="%", center=90, amplitude=15)["rh"] + test_temp = hdp.utils.generate_test_warming_dataarray(grid_shape=grid_shape).rename("temp") + test_rh = baseline_rh hw_definitions = [[3,0,0], [3,1,1], [4,2,0], [4,1,3], [5,0,1], [5,1,4]] @@ -37,22 +47,20 @@ def test_full_data_workflow(): assert metrics.definition.values[5] == "5-1-4" assert (metrics.percentile.values == percentiles).all() - assert (metrics["temp.temp_threshold.HWF"] == metrics["temp_hi.temp_hi_threshold.HWF"]).all() - assert (metrics["temp.temp_threshold.HWD"] == metrics["temp_hi.temp_hi_threshold.HWD"]).all() - assert (metrics["temp.temp_threshold.HWA"] == metrics["temp_hi.temp_hi_threshold.HWA"]).all() - assert (metrics["temp.temp_threshold.HWN"] == metrics["temp_hi.temp_hi_threshold.HWN"]).all() - metric_means = metrics.mean() assert metric_means["temp.temp_threshold.HWF"] >= metric_means["temp.temp_threshold.HWD"] assert metric_means["temp.temp_threshold.HWD"] >= metric_means["temp.temp_threshold.HWA"] for var in metrics: - assert metrics[var].shape == (metrics.percentile.size, metrics.definition.size, metrics.lat.size, metrics.lon.size, metrics.time.size) + assert metrics[var].shape == (metrics.percentile.size, metrics.definition.size, metrics.lon.size, metrics.lat.size, metrics.time.size) assert metrics[var].dtype == int if "HWF" in var or "HWD" in var: assert metrics[var].attrs["units"] == 'heatwave days', f"Variable '{var}' has incorrect units '{metrics[var].attrs["units"]}'" elif "HWN" in var or "HWA" in var: assert metrics[var].attrs["units"] == 'heatwave events', f"Variable '{var}' has incorrect units '{metrics[var].attrs["units"]}'" else: - assert False, f"Cannot determine primary heatwave metric from variable '{var}'." \ No newline at end of file + assert False, f"Cannot determine primary heatwave metric from variable '{var}'." + + figure_notebook = create_notebook(metrics) + figure_notebook.save_notebook(f"{temp_output_dir}/sample_hw_summary_figures.ipynb") \ No newline at end of file diff --git a/hdp/utils.py b/hdp/utils.py index 21f0a3b..6e01082 100644 --- a/hdp/utils.py +++ b/hdp/utils.py @@ -36,44 +36,46 @@ def get_func_description(func): return desc -def generate_synthetic_dataset(center=25, amplitude=10, name="temperature", units="degC"): - time_values = xarray.cftime_range( - start=cftime.DatetimeNoLeap(2000, 1, 1), - end=cftime.DatetimeNoLeap(2009, 12, 31), +def generate_test_warming_dataarray(start_date="2000-01-01", end_date="2049-12-31", grid_shape=(12, 10), warming_period=10): + base_data = generate_test_control_dataarray(start_date=start_date, end_date=end_date, grid_shape=grid_shape) + base_data += xarray.DataArray(np.arange(base_data["time"].size) / (365*warming_period), dims=["time"], coords={"time": base_data["time"]}) + return base_data + + +def generate_test_rh_dataarray(start_date="2000-01-01", end_date="2049-12-31", grid_shape=(12, 10)): + base_data = generate_test_control_dataarray(start_date=start_date, end_date=end_date, grid_shape=grid_shape) + base_data = abs(base_data / base_data.max() - 0.3) + base_data = base_data.rename("test_rh_data") + base_data.attrs["units"] = 'g/g' + return base_data + + +def generate_test_control_dataarray(start_date="2000-01-01", end_date="2049-12-31", grid_shape=(12, 10)): + time_values = xarray.date_range( + start=start_date, + end=end_date, freq="D", - calendar="noleap" + calendar="noleap", + use_cftime=True ) + temperature_seasonal_ts = 20 + 15*np.sin(np.pi*np.arange(time_values.size, dtype=float) / 365) + temperature_seasonal_vals = np.broadcast_to(temperature_seasonal_ts, (grid_shape[0], grid_shape[1], temperature_seasonal_ts.size)) - temp_timeseries = center + amplitude*np.sin(np.pi*np.arange(time_values.size, dtype=float) / 365) - temp_values = np.broadcast_to(temp_timeseries, (3, 3, temp_timeseries.size)) - - temp_da = xarray.DataArray( - data=da.from_array(temp_values), - dims=["lat", "lon", "time"], + lat_vals = np.linspace(-90, 90, grid_shape[1], dtype=float) + + lat_grad = np.broadcast_to(np.abs(lat_vals) / 90 * 15, grid_shape) + temperature_seasonal_vals = temperature_seasonal_vals - np.broadcast_to(lat_grad[:, :, None], temperature_seasonal_vals.shape) + + return xarray.DataArray( + data=temperature_seasonal_vals, + dims=["lon", "lat", "time"], coords={ - "lat": np.array([-90, 0, 90], dtype=float), - "lon": np.array([-180, 0, 180], dtype=float), + "lon": np.linspace(-180, 180, grid_shape[0], dtype=float), + "lat": lat_vals, "time": time_values }, - name=name, + name="test_temperature_data", attrs={ - "units": units + "units": "degC" } - ).chunk(dict(lat=1, lon=1)) - return xarray.Dataset({name: temp_da}) - - -def generate_exceedance_dataarray(measure, exceedance_pattern, multiplier=1.0): - tiles = np.ceil(measure.time.size / len(exceedance_pattern)).astype(int) - pattern_data = np.broadcast_to(np.tile(exceedance_pattern, tiles)[:measure.time.size], measure.shape)*multiplier - - ret_da = None - with xarray.set_options(keep_attrs=True): - ret_da = measure + xarray.DataArray( - data=da.from_array(pattern_data), - dims=measure.dims, - coords=measure.coords, - name=measure.name, - attrs=measure.attrs - ).chunk(measure.chunksizes) - return ret_da + ).chunk('auto') \ No newline at end of file