diff --git a/.jules/bolt.md b/.jules/bolt.md index 5f56bd7..79f971b 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,7 @@ ## 2026-01-30 - [Vectorization of Grid Generation] **Learning:** Python loops for grid generation (even small ones like 10x10) are significantly slower than numpy vectorization (~50% overhead for 100 points). **Action:** Use `np.meshgrid` and vectorized operations for spatial data generation whenever possible. + +## 2026-05-21 - [Meshgrid Indexing for Geospatial Data] +**Learning:** When replacing nested loops `for lat in lats: for lon in lons:` with `np.meshgrid`, use `indexing='ij'` to preserve the array shape `(len(lats), len(lons))` and iteration order. Default `xy` indexing transposes dimensions. +**Action:** Use `np.meshgrid(lats, lons, indexing='ij')` when vectorizing map grid generation. diff --git a/__pycache__/nasa_data.cpython-312.pyc b/__pycache__/nasa_data.cpython-312.pyc index 5bf9b2e..08039e9 100644 Binary files a/__pycache__/nasa_data.cpython-312.pyc and b/__pycache__/nasa_data.cpython-312.pyc differ diff --git a/nasa_data.py b/nasa_data.py index 419477f..2a99988 100644 --- a/nasa_data.py +++ b/nasa_data.py @@ -15,149 +15,165 @@ BASE_URL = "https://power.larc.nasa.gov/api/temporal/daily/point" + @functools.lru_cache(maxsize=128) def _fetch_nasa_power_data_cached(lat, lon, start_date, end_date, parameters_tuple): """ Internal cached function for fetching NASA POWER data. """ parameters = list(parameters_tuple) - + # Generate a cache key for this specific request # Round coordinates to 4 decimal places to improve cache hits lat_rounded = round(lat, 4) lon_rounded = round(lon, 4) - + # Convert dates to required format (YYYYMMDD) - start_date_obj = datetime.strptime(start_date, '%Y-%m-%d') - end_date_obj = datetime.strptime(end_date, '%Y-%m-%d') - start_date_str = start_date_obj.strftime('%Y%m%d') - end_date_str = end_date_obj.strftime('%Y%m%d') - + start_date_obj = datetime.strptime(start_date, "%Y-%m-%d") + end_date_obj = datetime.strptime(end_date, "%Y-%m-%d") + start_date_str = start_date_obj.strftime("%Y%m%d") + end_date_str = end_date_obj.strftime("%Y%m%d") + # Build the request URL params = { - 'parameters': ','.join(parameters), - 'community': 'RE', - 'longitude': lon_rounded, - 'latitude': lat_rounded, - 'start': start_date_str, - 'end': end_date_str, - 'format': 'JSON' + "parameters": ",".join(parameters), + "community": "RE", + "longitude": lon_rounded, + "latitude": lat_rounded, + "start": start_date_str, + "end": end_date_str, + "format": "JSON", } - + try: # Make the request with retry logic for improved reliability max_retries = 3 retry_delay = 1 # seconds - + for retry in range(max_retries): try: response = requests.get(BASE_URL, params=params, timeout=10) response.raise_for_status() # Raise exception for HTTP errors break # If successful, exit the retry loop - except (requests.exceptions.RequestException, requests.exceptions.Timeout) as e: + except ( + requests.exceptions.RequestException, + requests.exceptions.Timeout, + ) as e: if retry < max_retries - 1: # If we have retries left - print(f"API request failed, retrying in {retry_delay} seconds... ({str(e)})") + print( + f"API request failed, retrying in {retry_delay} seconds... ({str(e)})" + ) time.sleep(retry_delay) retry_delay *= 2 # Exponential backoff else: raise # Re-raise the last exception if all retries failed - + # Parse the JSON response data = response.json() - + # Extract the data from the response - if 'properties' not in data or 'parameter' not in data['properties']: + if "properties" not in data or "parameter" not in data["properties"]: raise ValueError("Unexpected API response format") - - parameter_data = data['properties']['parameter'] - + + parameter_data = data["properties"]["parameter"] + # Create a DataFrame from the data dates = [] values = {param: [] for param in parameters} - + # Get the date range from the response for date_str in parameter_data[parameters[0]].keys(): - dates.append(datetime.strptime(date_str, '%Y%m%d')) - + dates.append(datetime.strptime(date_str, "%Y%m%d")) + for param in parameters: if param in parameter_data: values[param].append(parameter_data[param][date_str]) else: values[param].append(None) - + # Create the DataFrame - df = pd.DataFrame({'Date': dates}) - + df = pd.DataFrame({"Date": dates}) + # Add the parameter values for param in parameters: df[param] = values[param] - + return df - + except Exception as e: raise Exception(f"Error fetching NASA POWER data: {str(e)}") + def fetch_nasa_power_data(lat, lon, start_date, end_date, parameters=None): """ Fetch climate data from NASA POWER API with caching for improved performance - + Args: lat: Latitude (-90 to 90) lon: Longitude (-180 to 180) start_date: Start date in format 'YYYY-MM-DD' end_date: End date in format 'YYYY-MM-DD' parameters: List of parameters to fetch - + Returns: DataFrame with climate data """ if parameters is None: # Default parameters for climate data parameters = [ - "T2M", # Temperature at 2 Meters (°C) - "T2M_MAX", # Maximum Temperature at 2 Meters (°C) - "T2M_MIN", # Minimum Temperature at 2 Meters (°C) + "T2M", # Temperature at 2 Meters (°C) + "T2M_MAX", # Maximum Temperature at 2 Meters (°C) + "T2M_MIN", # Minimum Temperature at 2 Meters (°C) "PRECTOTCORR", # Precipitation Corrected (mm/day) - "RH2M", # Relative Humidity at 2 Meters (%) - "WS2M" # Wind Speed at 2 Meters (m/s) + "RH2M", # Relative Humidity at 2 Meters (%) + "WS2M", # Wind Speed at 2 Meters (m/s) ] # Convert parameters to tuple for caching parameters_tuple = tuple(sorted(parameters)) # Get cached result and return a copy to prevent mutation - return _fetch_nasa_power_data_cached(lat, lon, start_date, end_date, parameters_tuple).copy() + return _fetch_nasa_power_data_cached( + lat, lon, start_date, end_date, parameters_tuple + ).copy() + @functools.lru_cache(maxsize=32) -def _fetch_precipitation_map_data_cached(lat, lon, start_date, end_date, radius_degrees, fast_mode): +def _fetch_precipitation_map_data_cached( + lat, lon, start_date, end_date, radius_degrees, fast_mode +): """Internal cached function for precipitation map data.""" # Create a grid of points around the center - balanced for speed and visual quality - grid_size = 10 # Reduced back to 10 for faster loading, with improved rendering parameters + grid_size = ( + 10 # Reduced back to 10 for faster loading, with improved rendering parameters + ) lat_range = np.linspace(lat - radius_degrees, lat + radius_degrees, grid_size) lon_range = np.linspace(lon - radius_degrees, lon + radius_degrees, grid_size) - + # Initialize empty DataFrame precip_data = [] - + # Get total days in period for estimating precipitation - start_date_dt = datetime.strptime(start_date, '%Y-%m-%d') - end_date_dt = datetime.strptime(end_date, '%Y-%m-%d') + start_date_dt = datetime.strptime(start_date, "%Y-%m-%d") + end_date_dt = datetime.strptime(end_date, "%Y-%m-%d") days_in_period = (end_date_dt - start_date_dt).days + 1 - + # For periods up to 14 days, or if fast_mode is enabled, # just fetch the central point for immediate response and extrapolate for better visualization if days_in_period <= 14 or radius_degrees <= 0.5 or fast_mode: try: # Fetch data for the central point - df = fetch_nasa_power_data(lat, lon, start_date, end_date, parameters=["PRECTOTCORR"]) - + df = fetch_nasa_power_data( + lat, lon, start_date, end_date, parameters=["PRECTOTCORR"] + ) + # Calculate total precipitation for the period - central_precip = df['PRECTOTCORR'].sum() - + central_precip = df["PRECTOTCORR"].sum() + # For zero or very low precipitation, set a minimum value for visualization if central_precip < 0.1: central_precip = 0.1 - + # Generate a realistic precipitation distribution based on the central point # This approximation is faster than making hundreds of API calls # Use vectorized numpy operations for performance (~50% faster than iterative approach) @@ -166,11 +182,15 @@ def _fetch_precipitation_map_data_cached(lat, lon, start_date, end_date, radius_ LON, LAT = np.meshgrid(lon_range, lat_range) # Calculate distance from center (0-1 scale) - dist_factor = ((LAT - lat)**2 + (LON - lon)**2) / (2 * radius_degrees**2) + dist_factor = ((LAT - lat) ** 2 + (LON - lon) ** 2) / ( + 2 * radius_degrees**2 + ) # Apply a realistic variation based on distance # Generate random variation for the entire grid at once - variation = 1.0 - 0.3 * dist_factor + 0.2 * np.random.random(dist_factor.shape) + variation = ( + 1.0 - 0.3 * dist_factor + 0.2 * np.random.random(dist_factor.shape) + ) # Ensure variation is reasonable variation = np.clip(variation, 0.5, 1.5) @@ -180,325 +200,392 @@ def _fetch_precipitation_map_data_cached(lat, lon, start_date, end_date, radius_ # Ensure precipitation is a positive number point_precip = np.maximum(0.01, point_precip) - + # Return the synthesized grid data - return pd.DataFrame({ - 'latitude': LAT.flatten(), - 'longitude': LON.flatten(), - 'precipitation': point_precip.flatten() - }) - + return pd.DataFrame( + { + "latitude": LAT.flatten(), + "longitude": LON.flatten(), + "precipitation": point_precip.flatten(), + } + ) + except Exception as e: print(f"Warning: Could not fetch central data point: {str(e)}") - + # For longer periods, fetch a subset of points and interpolate between them # This approach balances accuracy with speed # When fast_mode is off, use denser sampling for higher quality maps - sample_step = 3 if fast_mode else 2 # Adjust sampling density based on speed preference - + sample_step = ( + 3 if fast_mode else 2 + ) # Adjust sampling density based on speed preference + # Fetch data for sampled points for i, grid_lat in enumerate(lat_range): if i % sample_step != 0 and i != len(lat_range) - 1: continue # Skip non-sampled points - + for j, grid_lon in enumerate(lon_range): if j % sample_step != 0 and j != len(lon_range) - 1: continue # Skip non-sampled points - + try: # Fetch data for this point - df = fetch_nasa_power_data(grid_lat, grid_lon, start_date, end_date, parameters=["PRECTOTCORR"]) - + df = fetch_nasa_power_data( + grid_lat, grid_lon, start_date, end_date, parameters=["PRECTOTCORR"] + ) + # Calculate total precipitation for the period - total_precip = df['PRECTOTCORR'].sum() - + total_precip = df["PRECTOTCORR"].sum() + # Ensure precipitation is a positive number total_precip = max(0.01, total_precip) - + # Add to the results - precip_data.append({ - 'latitude': grid_lat, - 'longitude': grid_lon, - 'precipitation': total_precip, - 'is_sampled': True - }) + precip_data.append( + { + "latitude": grid_lat, + "longitude": grid_lon, + "precipitation": total_precip, + "is_sampled": True, + } + ) except Exception as e: - print(f"Warning: Could not fetch data for point ({grid_lat}, {grid_lon}): {str(e)}") + print( + f"Warning: Could not fetch data for point ({grid_lat}, {grid_lon}): {str(e)}" + ) # Continue with other points - + # Create DataFrame from sampled points sampled_df = pd.DataFrame(precip_data) - + # If we couldn't get any sampled points, return an empty DataFrame if len(sampled_df) == 0: - return pd.DataFrame(columns=['latitude', 'longitude', 'precipitation']) - - # Now interpolate for the missing points - full_grid_data = [] - - # Add all sampled points to the full grid - for _, row in sampled_df.iterrows(): - full_grid_data.append({ - 'latitude': row['latitude'], - 'longitude': row['longitude'], - 'precipitation': row['precipitation'] - }) - - # For non-sampled points, interpolate from nearest sampled points - for grid_lat in lat_range: - for grid_lon in lon_range: - # Skip points we already have - if any((sampled_df['latitude'] == grid_lat) & (sampled_df['longitude'] == grid_lon)): - continue - - # Find nearest sampled points and interpolate - distances = [] - precips = [] - - for _, row in sampled_df.iterrows(): - dist = ((row['latitude'] - grid_lat)**2 + (row['longitude'] - grid_lon)**2) ** 0.5 - distances.append(dist) - precips.append(row['precipitation']) - - # Calculate distance-weighted average - if distances: - # Avoid division by zero - weights = [1/(d+0.0001) for d in distances] - total_weight = sum(weights) - weighted_precip = sum(w * p for w, p in zip(weights, precips)) / total_weight - - # Add some realistic variation - variation = 0.9 + 0.2 * np.random.random() - interpolated_precip = weighted_precip * variation - - # Ensure precipitation is a positive number - interpolated_precip = max(0.01, interpolated_precip) - - full_grid_data.append({ - 'latitude': grid_lat, - 'longitude': grid_lon, - 'precipitation': interpolated_precip - }) - - # Convert to DataFrame - return pd.DataFrame(full_grid_data) - -def fetch_precipitation_map_data(lat, lon, start_date, end_date, radius_degrees=1.0, fast_mode=True): + return pd.DataFrame(columns=["latitude", "longitude", "precipitation"]) + + # Now interpolate for the missing points using vectorized operations + # This replaces the O(N*M) nested loop with fast numpy operations + + # Create meshgrid for target points + grid_lat_mesh, grid_lon_mesh = np.meshgrid(lat_range, lon_range, indexing="ij") + + # Get sampled data arrays + sampled_lats = sampled_df["latitude"].values + sampled_lons = sampled_df["longitude"].values + sampled_precips = sampled_df["precipitation"].values + + # Calculate squared Euclidean distances using broadcasting + # shape: (grid_size, grid_size, n_samples) + d_lat = grid_lat_mesh[..., np.newaxis] - sampled_lats + d_lon = grid_lon_mesh[..., np.newaxis] - sampled_lons + dist_sq = d_lat**2 + d_lon**2 + dist = np.sqrt(dist_sq) + + # Calculate IDW weights + # Add small epsilon to avoid division by zero + weights = 1.0 / (dist + 0.0001) + + # Calculate weighted average + # sum(weights * values) / sum(weights) + weighted_sum = np.sum(weights * sampled_precips, axis=2) + total_weights = np.sum(weights, axis=2) + interpolated_precip = weighted_sum / total_weights + + # Apply variation to interpolated points + # Sampled points (distance ~ 0) should keep their original values (mostly) + # Use min distance to identify sampled points + min_dist = np.min(dist, axis=2) + is_sampled_mask = min_dist < 0.001 + + # Generate random variation + variation = 0.9 + 0.2 * np.random.random(interpolated_precip.shape) + + # Don't apply variation to sampled points (set factor to 1.0) + variation[is_sampled_mask] = 1.0 + + # Apply variation + final_precip = interpolated_precip * variation + + # Ensure positive values + final_precip = np.maximum(0.01, final_precip) + + # Create result DataFrame + return pd.DataFrame( + { + "latitude": grid_lat_mesh.flatten(), + "longitude": grid_lon_mesh.flatten(), + "precipitation": final_precip.flatten(), + } + ) + + +def fetch_precipitation_map_data( + lat, lon, start_date, end_date, radius_degrees=1.0, fast_mode=True +): """Wrapper for cached precipitation data.""" - return _fetch_precipitation_map_data_cached(lat, lon, start_date, end_date, radius_degrees, fast_mode).copy() + return _fetch_precipitation_map_data_cached( + lat, lon, start_date, end_date, radius_degrees, fast_mode + ).copy() + @functools.lru_cache(maxsize=32) def _get_temperature_trends_cached(lat, lon, start_date, end_date): """Internal cached function for temperature trends.""" # Fetch temperature data - df = fetch_nasa_power_data(lat, lon, start_date, end_date, - parameters=["T2M", "T2M_MAX", "T2M_MIN"]) - + df = fetch_nasa_power_data( + lat, lon, start_date, end_date, parameters=["T2M", "T2M_MAX", "T2M_MIN"] + ) + # Convert to datetime if needed - if not pd.api.types.is_datetime64_dtype(df['Date']): - df['Date'] = pd.to_datetime(df['Date']) - + if not pd.api.types.is_datetime64_dtype(df["Date"]): + df["Date"] = pd.to_datetime(df["Date"]) + # Create month column - df['Year-Month'] = df['Date'].dt.to_period('M') - + df["Year-Month"] = df["Date"].dt.to_period("M") + # Calculate monthly averages - monthly_data = df.groupby('Year-Month').agg({ - 'T2M': 'mean', - 'T2M_MAX': 'mean', - 'T2M_MIN': 'mean', - 'Date': 'first' # Keep one date for each month - }).reset_index() - + monthly_data = ( + df.groupby("Year-Month") + .agg( + { + "T2M": "mean", + "T2M_MAX": "mean", + "T2M_MIN": "mean", + "Date": "first", # Keep one date for each month + } + ) + .reset_index() + ) + # Rename columns - monthly_data = monthly_data.rename(columns={ - 'T2M': 'Temperature (°C)', - 'T2M_MAX': 'Max Temperature (°C)', - 'T2M_MIN': 'Min Temperature (°C)' - }) - + monthly_data = monthly_data.rename( + columns={ + "T2M": "Temperature (°C)", + "T2M_MAX": "Max Temperature (°C)", + "T2M_MIN": "Min Temperature (°C)", + } + ) + # Sort by date - monthly_data = monthly_data.sort_values('Date') - + monthly_data = monthly_data.sort_values("Date") + # Calculate trend using linear regression from scipy import stats + x = np.arange(len(monthly_data)) if len(x) > 1: # Need at least two points for a trend slope, intercept, r_value, p_value, std_err = stats.linregress( - x, monthly_data['Temperature (°C)'] + x, monthly_data["Temperature (°C)"] ) - monthly_data['Trend'] = intercept + slope * x + monthly_data["Trend"] = intercept + slope * x trend_per_decade = slope * 120 # 120 months in a decade else: - monthly_data['Trend'] = monthly_data['Temperature (°C)'] + monthly_data["Trend"] = monthly_data["Temperature (°C)"] trend_per_decade = 0 - + return monthly_data, trend_per_decade + def get_temperature_trends(lat, lon, start_date, end_date): """Wrapper for cached temperature trends.""" df, trend = _get_temperature_trends_cached(lat, lon, start_date, end_date) return df.copy(), trend + @functools.lru_cache(maxsize=32) def _get_extreme_heat_days_cached(lat, lon, year, percentile): """Internal cached function for extreme heat days.""" # Set date range for the specified year start_date = f"{year}-01-01" end_date = f"{year}-12-31" - + # Fetch temperature and humidity data - df = fetch_nasa_power_data(lat, lon, start_date, end_date, - parameters=["T2M_MAX", "RH2M"]) - + df = fetch_nasa_power_data( + lat, lon, start_date, end_date, parameters=["T2M_MAX", "RH2M"] + ) + # Calculate heat index def calculate_heat_index(row): - t = row['T2M_MAX'] # Temperature in Celsius - rh = row['RH2M'] # Relative humidity in % - + t = row["T2M_MAX"] # Temperature in Celsius + rh = row["RH2M"] # Relative humidity in % + # Simple formula for heat index if t < 26: return t # Below this temperature, heat index equals temperature - + # Full formula - hi = -8.78469475556 + \ - 1.61139411 * t + \ - 2.33854883889 * rh + \ - -0.14611605 * t * rh + \ - -0.012308094 * t**2 + \ - -0.0164248277778 * rh**2 + \ - 0.002211732 * t**2 * rh + \ - 0.00072546 * t * rh**2 + \ - -0.000003582 * t**2 * rh**2 - + hi = ( + -8.78469475556 + + 1.61139411 * t + + 2.33854883889 * rh + + -0.14611605 * t * rh + + -0.012308094 * t**2 + + -0.0164248277778 * rh**2 + + 0.002211732 * t**2 * rh + + 0.00072546 * t * rh**2 + + -0.000003582 * t**2 * rh**2 + ) + return hi - + # Apply heat index calculation - df['Heat Index (°C)'] = df.apply(calculate_heat_index, axis=1) - + df["Heat Index (°C)"] = df.apply(calculate_heat_index, axis=1) + # Determine thresholds - temp_threshold = np.percentile(df['T2M_MAX'], percentile) - hi_threshold = np.percentile(df['Heat Index (°C)'], percentile) - + temp_threshold = np.percentile(df["T2M_MAX"], percentile) + hi_threshold = np.percentile(df["Heat Index (°C)"], percentile) + # Flag extreme heat days - df['Extreme Temperature'] = df['T2M_MAX'] > temp_threshold - df['Extreme Heat Index'] = df['Heat Index (°C)'] > hi_threshold - + df["Extreme Temperature"] = df["T2M_MAX"] > temp_threshold + df["Extreme Heat Index"] = df["Heat Index (°C)"] > hi_threshold + # Add month and day columns - df['Month'] = df['Date'].dt.month - df['Day'] = df['Date'].dt.day - + df["Month"] = df["Date"].dt.month + df["Day"] = df["Date"].dt.day + return df, temp_threshold, hi_threshold + def get_extreme_heat_days(lat, lon, year, percentile=95): """Wrapper for cached extreme heat days.""" df, t_thresh, h_thresh = _get_extreme_heat_days_cached(lat, lon, year, percentile) return df.copy(), t_thresh, h_thresh + @functools.lru_cache(maxsize=32) -def _get_rainfall_comparison_cached(lat, lon, current_start, current_end, prev_start, prev_end): +def _get_rainfall_comparison_cached( + lat, lon, current_start, current_end, prev_start, prev_end +): """Internal cached function for rainfall comparison.""" # Fetch data for current period - current_df = fetch_nasa_power_data(lat, lon, current_start, current_end, - parameters=["PRECTOTCORR"]) - + current_df = fetch_nasa_power_data( + lat, lon, current_start, current_end, parameters=["PRECTOTCORR"] + ) + # Fetch data for previous period - prev_df = fetch_nasa_power_data(lat, lon, prev_start, prev_end, - parameters=["PRECTOTCORR"]) - + prev_df = fetch_nasa_power_data( + lat, lon, prev_start, prev_end, parameters=["PRECTOTCORR"] + ) + # Add year marker - current_df['Year'] = 'This Year' - prev_df['Year'] = 'Last Year' - + current_df["Year"] = "This Year" + prev_df["Year"] = "Last Year" + # Rename precipitation column - current_df = current_df.rename(columns={'PRECTOTCORR': 'Precipitation (mm)'}) - prev_df = prev_df.rename(columns={'PRECTOTCORR': 'Precipitation (mm)'}) - + current_df = current_df.rename(columns={"PRECTOTCORR": "Precipitation (mm)"}) + prev_df = prev_df.rename(columns={"PRECTOTCORR": "Precipitation (mm)"}) + # Calculate day of season - current_df['datetime'] = pd.to_datetime(current_df['Date']) - current_df['Day of Season'] = (current_df['datetime'] - pd.to_datetime(current_start)).dt.days - - prev_df['datetime'] = pd.to_datetime(prev_df['Date']) - prev_df['Day of Season'] = (prev_df['datetime'] - pd.to_datetime(prev_start)).dt.days - + current_df["datetime"] = pd.to_datetime(current_df["Date"]) + current_df["Day of Season"] = ( + current_df["datetime"] - pd.to_datetime(current_start) + ).dt.days + + prev_df["datetime"] = pd.to_datetime(prev_df["Date"]) + prev_df["Day of Season"] = ( + prev_df["datetime"] - pd.to_datetime(prev_start) + ).dt.days + # Calculate cumulative precipitation - current_df = current_df.sort_values('datetime') - prev_df = prev_df.sort_values('datetime') - - current_df['Cumulative Precipitation (mm)'] = current_df['Precipitation (mm)'].cumsum() - prev_df['Cumulative Precipitation (mm)'] = prev_df['Precipitation (mm)'].cumsum() - + current_df = current_df.sort_values("datetime") + prev_df = prev_df.sort_values("datetime") + + current_df["Cumulative Precipitation (mm)"] = current_df[ + "Precipitation (mm)" + ].cumsum() + prev_df["Cumulative Precipitation (mm)"] = prev_df["Precipitation (mm)"].cumsum() + return current_df, prev_df + def get_rainfall_comparison(lat, lon, current_start, current_end, prev_start, prev_end): """Wrapper for cached rainfall comparison.""" - df1, df2 = _get_rainfall_comparison_cached(lat, lon, current_start, current_end, prev_start, prev_end) + df1, df2 = _get_rainfall_comparison_cached( + lat, lon, current_start, current_end, prev_start, prev_end + ) return df1.copy(), df2.copy() + @functools.lru_cache(maxsize=32) -def _calculate_climate_anomalies_cached(lat, lon, start_date, end_date, variable, baseline_period): +def _calculate_climate_anomalies_cached( + lat, lon, start_date, end_date, variable, baseline_period +): """Internal cached function for climate anomalies.""" # Map variable to NASA POWER parameter - if variable.lower() == 'temperature': - parameter = 'T2M' - value_col = 'Temperature (°C)' - elif variable.lower() == 'precipitation': - parameter = 'PRECTOTCORR' - value_col = 'Precipitation (mm)' - elif variable.lower() == 'humidity': - parameter = 'RH2M' - value_col = 'Relative Humidity (%)' - elif variable.lower() == 'wind speed': - parameter = 'WS2M' - value_col = 'Wind Speed (m/s)' + if variable.lower() == "temperature": + parameter = "T2M" + value_col = "Temperature (°C)" + elif variable.lower() == "precipitation": + parameter = "PRECTOTCORR" + value_col = "Precipitation (mm)" + elif variable.lower() == "humidity": + parameter = "RH2M" + value_col = "Relative Humidity (%)" + elif variable.lower() == "wind speed": + parameter = "WS2M" + value_col = "Wind Speed (m/s)" else: raise ValueError(f"Unsupported variable: {variable}") - + # Fetch data for the analysis period df = fetch_nasa_power_data(lat, lon, start_date, end_date, parameters=[parameter]) - + # Rename the column df = df.rename(columns={parameter: value_col}) - + # Parse baseline years - baseline_start, baseline_end = baseline_period.split('-') + baseline_start, baseline_end = baseline_period.split("-") baseline_start_year = int(baseline_start) baseline_end_year = int(baseline_end) - + # Calculate month for each date - df['Month'] = pd.to_datetime(df['Date']).dt.month - + df["Month"] = pd.to_datetime(df["Date"]).dt.month + # The NASA POWER dataset doesn't have data going back to typical climate baselines # So we'll simulate baseline data based on the current data with some adjustments # For a real application, you would use actual historical data - + # Create baseline monthly means - monthly_means = df.groupby('Month')[value_col].mean().reset_index() - + monthly_means = df.groupby("Month")[value_col].mean().reset_index() + # Apply adjustments based on the baseline period # This is a simplification - real climate change adjustments would be more complex - if variable.lower() == 'temperature': + if variable.lower() == "temperature": # Adjust temperature baseline (approximately -0.2°C per decade going back) - decades_diff = (datetime.now().year - (baseline_start_year + baseline_end_year) / 2) / 10 - monthly_means[f"{value_col}_baseline"] = monthly_means[value_col] - (decades_diff * 0.2) + decades_diff = ( + datetime.now().year - (baseline_start_year + baseline_end_year) / 2 + ) / 10 + monthly_means[f"{value_col}_baseline"] = monthly_means[value_col] - ( + decades_diff * 0.2 + ) else: # For other variables, use a smaller adjustment monthly_means[f"{value_col}_baseline"] = monthly_means[value_col] * 0.95 - + # Join the baseline means to the main data - df = df.merge(monthly_means[['Month', f"{value_col}_baseline"]], on='Month') - + df = df.merge(monthly_means[["Month", f"{value_col}_baseline"]], on="Month") + # Calculate anomalies - if variable.lower() == 'temperature': + if variable.lower() == "temperature": # For temperature, use simple difference - df['Anomaly'] = df[value_col] - df[f"{value_col}_baseline"] - df['Anomaly Unit'] = "°C" + df["Anomaly"] = df[value_col] - df[f"{value_col}_baseline"] + df["Anomaly Unit"] = "°C" else: # For other variables, use percent difference - df['Anomaly'] = (df[value_col] - df[f"{value_col}_baseline"]) / df[f"{value_col}_baseline"] * 100 - df['Anomaly Unit'] = "%" - + df["Anomaly"] = ( + (df[value_col] - df[f"{value_col}_baseline"]) + / df[f"{value_col}_baseline"] + * 100 + ) + df["Anomaly Unit"] = "%" + return df -def calculate_climate_anomalies(lat, lon, start_date, end_date, variable, baseline_period): + +def calculate_climate_anomalies( + lat, lon, start_date, end_date, variable, baseline_period +): """Wrapper for cached climate anomalies.""" - return _calculate_climate_anomalies_cached(lat, lon, start_date, end_date, variable, baseline_period).copy() + return _calculate_climate_anomalies_cached( + lat, lon, start_date, end_date, variable, baseline_period + ).copy()