From e562fa1553ff69fe6a9f74121c81c091a745ce05 Mon Sep 17 00:00:00 2001 From: Syahrial Agni Prasetya Date: Mon, 4 May 2026 20:35:48 +0700 Subject: [PATCH] Better --- Cargo.toml | 30 + __init__.py | 0 core/__init__.py | 29 - core/analysis.py | 100 ---- core/base.py | 545 ------------------ core/config.py | 374 ------------ core/data_source.py | 315 ---------- core/file_scanner.py | 173 ------ core/hydro.py | 57 -- core/logger.py | 77 --- core/normalize.py | 173 ------ core/processing.py | 92 --- core/resampler.py | 101 ---- core/terrain.py | 99 ---- engine/__init__.py | 0 engine/presets.py | 314 ---------- engine/risk_engine.py | 296 ---------- examples/flood_bandung_config.yaml | 100 +--- examples/run_flood_analysis.py | 170 ------ murasa-cli/Cargo.toml | 21 + murasa-cli/src/main.rs | 145 +++++ murasa-core/Cargo.toml | 26 + murasa-core/src/analysis.rs | 15 + murasa-core/src/base.rs | 314 ++++++++++ murasa-core/src/config.rs | 372 ++++++++++++ murasa-core/src/data_source.rs | 222 +++++++ murasa-core/src/hydro.rs | 61 ++ murasa-core/src/lib.rs | 24 + murasa-core/src/logger.rs | 66 +++ murasa-core/src/normalize.rs | 105 ++++ murasa-core/src/processing.rs | 76 +++ murasa-core/src/resampler.rs | 59 ++ murasa-core/src/terrain.rs | 89 +++ murasa-core/tests/config_test.rs | 47 ++ murasa-engine/Cargo.toml | 18 + murasa-engine/src/engine.rs | 310 ++++++++++ murasa-engine/src/lib.rs | 12 + murasa-engine/src/presets.rs | 67 +++ murasa-engine/tests/engine_test.rs | 17 + murasa-plugins/Cargo.toml | 19 + murasa-plugins/src/curvature.rs | 49 ++ murasa-plugins/src/elevation.rs | 55 ++ murasa-plugins/src/land_use.rs | 57 ++ murasa-plugins/src/lib.rs | 26 + .../src/loaders/admin_boundaries.rs | 54 ++ murasa-plugins/src/loaders/landuse.rs | 46 ++ murasa-plugins/src/loaders/mod.rs | 9 + murasa-plugins/src/loaders/rivers.rs | 46 ++ murasa-plugins/src/proximity.rs | 55 ++ murasa-plugins/src/rainfall.rs | 66 +++ murasa-plugins/src/reporting.rs | 49 ++ murasa-plugins/src/slope.rs | 59 ++ murasa-plugins/src/twi.rs | 59 ++ murasa-plugins/src/water_exclusion.rs | 50 ++ plugins/__init__.py | 9 - plugins/curvature.py | 148 ----- plugins/elevation.py | 12 - plugins/land_use.py | 89 --- plugins/loaders/__init__.py | 1 - plugins/loaders/admin_boundaries.py | 38 -- plugins/loaders/landuse.py | 237 -------- plugins/loaders/rivers.py | 39 -- plugins/proximity.py | 38 -- plugins/rainfall.py | 81 --- plugins/reporting.py | 354 ------------ plugins/slope.py | 38 -- plugins/twi.py | 77 --- plugins/water_exclusion.py | 48 -- pyproject.toml | 53 -- rustfmt.toml | 2 + 70 files changed, 2829 insertions(+), 4245 deletions(-) create mode 100644 Cargo.toml delete mode 100644 __init__.py delete mode 100644 core/__init__.py delete mode 100644 core/analysis.py delete mode 100644 core/base.py delete mode 100644 core/config.py delete mode 100644 core/data_source.py delete mode 100644 core/file_scanner.py delete mode 100644 core/hydro.py delete mode 100644 core/logger.py delete mode 100644 core/normalize.py delete mode 100644 core/processing.py delete mode 100644 core/resampler.py delete mode 100644 core/terrain.py delete mode 100644 engine/__init__.py delete mode 100644 engine/presets.py delete mode 100644 engine/risk_engine.py delete mode 100644 examples/run_flood_analysis.py create mode 100644 murasa-cli/Cargo.toml create mode 100644 murasa-cli/src/main.rs create mode 100644 murasa-core/Cargo.toml create mode 100644 murasa-core/src/analysis.rs create mode 100644 murasa-core/src/base.rs create mode 100644 murasa-core/src/config.rs create mode 100644 murasa-core/src/data_source.rs create mode 100644 murasa-core/src/hydro.rs create mode 100644 murasa-core/src/lib.rs create mode 100644 murasa-core/src/logger.rs create mode 100644 murasa-core/src/normalize.rs create mode 100644 murasa-core/src/processing.rs create mode 100644 murasa-core/src/resampler.rs create mode 100644 murasa-core/src/terrain.rs create mode 100644 murasa-core/tests/config_test.rs create mode 100644 murasa-engine/Cargo.toml create mode 100644 murasa-engine/src/engine.rs create mode 100644 murasa-engine/src/lib.rs create mode 100644 murasa-engine/src/presets.rs create mode 100644 murasa-engine/tests/engine_test.rs create mode 100644 murasa-plugins/Cargo.toml create mode 100644 murasa-plugins/src/curvature.rs create mode 100644 murasa-plugins/src/elevation.rs create mode 100644 murasa-plugins/src/land_use.rs create mode 100644 murasa-plugins/src/lib.rs create mode 100644 murasa-plugins/src/loaders/admin_boundaries.rs create mode 100644 murasa-plugins/src/loaders/landuse.rs create mode 100644 murasa-plugins/src/loaders/mod.rs create mode 100644 murasa-plugins/src/loaders/rivers.rs create mode 100644 murasa-plugins/src/proximity.rs create mode 100644 murasa-plugins/src/rainfall.rs create mode 100644 murasa-plugins/src/reporting.rs create mode 100644 murasa-plugins/src/slope.rs create mode 100644 murasa-plugins/src/twi.rs create mode 100644 murasa-plugins/src/water_exclusion.rs delete mode 100644 plugins/__init__.py delete mode 100644 plugins/curvature.py delete mode 100644 plugins/elevation.py delete mode 100644 plugins/land_use.py delete mode 100644 plugins/loaders/__init__.py delete mode 100644 plugins/loaders/admin_boundaries.py delete mode 100644 plugins/loaders/landuse.py delete mode 100644 plugins/loaders/rivers.py delete mode 100644 plugins/proximity.py delete mode 100644 plugins/rainfall.py delete mode 100644 plugins/reporting.py delete mode 100644 plugins/slope.py delete mode 100644 plugins/twi.py delete mode 100644 plugins/water_exclusion.py delete mode 100644 pyproject.toml create mode 100644 rustfmt.toml diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..722d2ba --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,30 @@ +[workspace] +members = [ + "murasa-core", + "murasa-engine", + "murasa-plugins", + "murasa-cli", +] +resolver = "2" + +[workspace.package] +version = "0.1.0" +edition = "2021" +license = "MIT" +authors = ["Murasa Contributors"] + +[workspace.dependencies] +ndarray = "0.15" +ndarray-stats = "0.5" +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +serde_json = "1.0" +thiserror = "1.0" +anyhow = "1.0" +log = "0.4" +env_logger = "0.11" +geo = "0.28" +geozero = "0.13" +geo-types = "0.7" +gdal = "0.16" +pathdiff = "0.2" diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/core/__init__.py b/core/__init__.py deleted file mode 100644 index ad790ec..0000000 --- a/core/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -from .config import ( - EngineConfig, - FactorConfig, - SpatialConfig, - OutputConfig, - ClassificationConfig, -) -from .logger import logger, log_success, log_error, log_warning - -__all__ = [ - 'EngineConfig', - 'FactorConfig', - 'SpatialConfig', - 'OutputConfig', - 'ClassificationConfig', - 'logger', - 'log_success', - 'log_error', - 'log_warning', - 'ProcessingContext', - 'percentile', 'minmax', 'rank', 'zscore', 'invert', - 'scan_shapefiles', 'scan_gdb_layers', 'scan_all', - 'LoaderRegistry', - 'RasterParameterPlugin' -] - -from .processing import ProcessingContext -from .normalize import percentile, minmax, rank, zscore, invert, classify_quantile, classify_jenks -from .file_scanner import scan_shapefiles, scan_gdb_layers, scan_all, scan_filename diff --git a/core/analysis.py b/core/analysis.py deleted file mode 100644 index 6ff7eb2..0000000 --- a/core/analysis.py +++ /dev/null @@ -1,100 +0,0 @@ -import numpy as np -import rasterio -from geopandas import GeoDataFrame -from rasterstats import zonal_stats -from tempfile import NamedTemporaryFile -import os -from typing import List, Dict, Any, Optional - -from murasa.core.logger import logger -from murasa.core.normalize import classify_jenks - -from murasa.core.data_source import DataSource - - -def calculate_zonal_stats( - vector_gdf: GeoDataFrame, - raster_source: DataSource, - stats: List[str] = ['mean', 'count'], - nodata: float = np.nan, - categorical: bool = False, - transform=None, - crs=None -) -> List[Dict[str, Any]]: - """ - Calculate zonal statistics for vector features against a raster source. - Handles temporary file creation/cleanup required by rasterstats. - - Args: - vector_gdf: GeoDataFrame with zones - raster_source: DataSource (raster) or numpy array - stats: List of statistics to compute - nodata: Nodata value - categorical: Whether raster is categorical - transform: Affine transform (required if raster_source is array) - crs: CRS (required if raster_source is array) - - Returns: - List of dicts containing stats - """ - - if hasattr(raster_source, 'data'): - - if raster_source.data is None: - logger.warning(f"DataSource {getattr(raster_source, 'name', 'unknown')} has no data loaded.") - return [] - - data = np.asanyarray(raster_source.data) - - if hasattr(raster_source, 'metadata'): - transform = transform or raster_source.metadata.get('transform') - crs = crs or raster_source.crs - elif isinstance(raster_source, np.ndarray): - data = raster_source - else: - - try: - logger.debug(f"Converting raster source of type {type(raster_source)} to numpy array...") - data = np.array(raster_source) - except Exception as e: - logger.error(f"Invalid raster source type: {type(raster_source)}. Error: {e}") - return [] - - if transform is None or crs is None: - raise ValueError("Transform and CRS required for array-based zonal stats") - - with NamedTemporaryFile(suffix='.tif', delete=False) as tmp: - try: - - with rasterio.open( - tmp.name, 'w', - driver='GTiff', - height=data.shape[0], - width=data.shape[1], - count=1, - dtype=data.dtype, - crs=crs, - transform=transform, - nodata=nodata - ) as dst: - dst.write(data, 1) - - results = zonal_stats( - vector_gdf, - tmp.name, - stats=stats, - categorical=categorical, - nodata=nodata - ) - return results - - except Exception as e: - logger.error(f"Zonal stats failed: {e}") - return [] - finally: - - if os.path.exists(tmp.name): - try: - os.remove(tmp.name) - except: - pass diff --git a/core/base.py b/core/base.py deleted file mode 100644 index a036d4e..0000000 --- a/core/base.py +++ /dev/null @@ -1,545 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Tuple, Any -from pathlib import Path -import numpy as np - -from .data_source import DataRegistry -from .logger import logger - - -class ParameterPlugin(ABC): - """ - Abstract base class for risk parameters - - Each parameter plugin: - 1. Validates required data sources - 2. Processes input data - 3. Returns normalized risk raster (0-1) - - Example plugins: - - ElevationPlugin: Low elevation = High flood risk - - SlopePlugin: Steep slope = High landslide risk - - ProximityPlugin: Close to river = High flood risk - """ - - def __init__(self, name: str, weight: float = 1.0): - """ - Initialize parameter plugin - - Args: - name: Unique identifier for this parameter - weight: Relative weight in risk calculation (0-1) - """ - self.name = name - self.weight = weight - self.result: np.ndarray = None - self.metadata: dict = {} - self.is_exclusion: bool = False - - @abstractmethod - def validate_requirements(self, registry: DataRegistry) -> bool: - """ - Check if required data sources are available - - Args: - registry: Data registry to check - - Returns: - True if all requirements met - """ - pass - - @abstractmethod - def process(self, - registry: DataRegistry, - grid_shape: Tuple[int, int], - transform: Any, - crs: str) -> np.ndarray: - """ - Process data and return normalized risk raster - - Args: - registry: Data source registry - grid_shape: Target raster shape (height, width) - transform: Rasterio transform object - crs: Target coordinate reference system - - Returns: - Normalized risk array (0-1, float32) - """ - pass - - def get_statistics(self) -> dict: - """ - Calculate statistics of the result - - Returns: - Dictionary with min, max, mean, std - """ - if self.result is None: - return {} - - valid = np.isfinite(self.result) - if not valid.any(): - return {'valid_pixels': 0} - - return { - 'min': float(np.nanmin(self.result)), - 'max': float(np.nanmax(self.result)), - 'mean': float(np.nanmean(self.result)), - 'std': float(np.nanstd(self.result)), - 'valid_pixels': int(valid.sum()), - 'total_pixels': self.result.size - } - - def normalize_percentile(self, - data: np.ndarray, - lower: float = 5, - upper: float = 95) -> np.ndarray: - """ - Normalize using percentile-based scaling (robust to outliers) - - Args: - data: Input array - lower: Lower percentile - upper: Upper percentile - - Returns: - Normalized array (0-1) - """ - valid = np.isfinite(data) - if not valid.any(): - logger.warning(f"{self.name}: No valid data for normalization") - return np.zeros_like(data) - - d_min, d_max = np.percentile(data[valid], [lower, upper]) - - if d_max == d_min: - logger.warning(f"{self.name}: Constant values detected") - return np.full_like(data, 0.5) - - normalized = np.clip((data - d_min) / (d_max - d_min), 0, 1) - return normalized.astype(np.float32) - - def normalize_minmax(self, data: np.ndarray) -> np.ndarray: - """ - Standard min-max normalization - - Args: - data: Input array - - Returns: - Normalized array (0-1) - """ - valid = np.isfinite(data) - if not valid.any(): - return np.zeros_like(data) - - d_min = np.nanmin(data) - d_max = np.nanmax(data) - - if d_max == d_min: - return np.full_like(data, 0.5) - - normalized = (data - d_min) / (d_max - d_min) - return normalized.astype(np.float32) - - def log_processing(self): - logger.info(f" Processing: {self.name} (weight={self.weight:.3f})") - - def log_result(self): - stats = self.get_statistics() - if stats: - logger.info(f" Range: {stats['min']:.3f} - {stats['max']:.3f}") - else: - logger.warning(f" No valid data produced") - - def __repr__(self) -> str: - return f"{self.__class__.__name__}(name='{self.name}', weight={self.weight:.3f})" - - def __str__(self) -> str: - return f"{self.name} ({self.weight:.3f})" - - -class RasterParameterPlugin(ParameterPlugin): - """ - Mainly for normalization. - - Subclasses only need to declare: - source_keys: Registry keys to try (first found is used) - inverse: Whether to invert the result (1 - normalized) - normalization: "percentile", "minmax", "rank", or "zscore" - - And optionally override: - transform_data(data, ctx): Custom transformation before normalization - - Example: - class ElevationPlugin(RasterParameterPlugin): - source_keys = ['dem', 'elevation'] - inverse = True - """ - - source_keys: list[str] = [] - inverse: bool = False - normalization: str = "percentile" - - def __init__(self, name: str = "", weight: float = 1.0, **kwargs): - if not name: - name = self.__class__.__name__.replace("Plugin", "").lower() - super().__init__(name, weight) - for key, val in kwargs.items(): - setattr(self, key, val) - - def validate_requirements(self, registry: DataRegistry) -> bool: - return any(registry.has(k) for k in self.source_keys) - - def _find_source(self, registry: DataRegistry): - for key in self.source_keys: - if registry.has(key): - return registry.get(key) - raise KeyError( - f"{self.name}: None of {self.source_keys} found in registry. " - f"Available: {registry.list_sources()}" - ) - - def transform_data(self, data: np.ndarray, ctx) -> np.ndarray: - """ - Override to apply custom transformations before normalization. - - Args: - data: Resampled raster array - ctx: ProcessingContext instance - - Returns: - Transformed array - """ - return data - - def _normalize(self, data: np.ndarray) -> np.ndarray: - from . import normalize as norm - - match self.normalization: - case "percentile": - return norm.percentile(data) - case "minmax": - return norm.minmax(data) - case "rank": - return norm.rank(data) - case "zscore": - return norm.zscore(data) - case _: - return norm.percentile(data) - - def process(self, registry, grid_shape, transform, crs): - from .processing import ProcessingContext - - self.log_processing() - ctx = ProcessingContext(grid_shape, transform, crs) - - source = self._find_source(registry) - data = ctx.resample_raster(source) - data = self.transform_data(data, ctx) - normalized = self._normalize(data) - - if self.inverse: - normalized = 1 - normalized - - self.result = normalized.astype(np.float32) - self.log_result() - return self.result - - -class PluginRegistry: - def __init__(self): - self.plugins: dict[str, type[ParameterPlugin]] = {} - - def register(self, plugin_class: type[ParameterPlugin]) -> None: - name = plugin_class.__name__ - self.plugins[name] = plugin_class - logger.debug(f"Registered plugin: {name}") - - def get(self, name: str) -> type[ParameterPlugin]: - if name not in self.plugins: - raise KeyError(f"Plugin '{name}' not found. " - f"Available: {list(self.plugins.keys())}") - return self.plugins[name] - - def list_plugins(self) -> list[str]: - """List all registered plugins""" - return list(self.plugins.keys()) - - -plugin_registry = PluginRegistry() - - -class PostProcessorPlugin(ABC): - """ - For post-processing results after rasterization and risk calculation. - For example, vectorization, adding explainability, etc. - """ - - def __init__(self, name: str): - self.name = name - - @abstractmethod - def post_process(self, - registry: DataRegistry, - risk_raster: np.ndarray, - transform: Any, - crs: str, - output_dir: Any, - config: Any = None): - """ - Execute post-processing logic - - Args: - registry: Data registry (to access input factors) - risk_raster: The final calculated risk raster - transform: Rasterio transform - crs: CRS string - output_dir: Path to output directory - config: EngineConfig object (optional) - """ - pass - - -class CircularDependencyError(Exception): - pass - - -class DataLoaderPlugin(ABC): - """ - Data loader plugin base class. - - Data loaders are used for: - 1. Loading data from various sources (files, APIs, etc.) - 2. Preprocessing and validation - 3. Registering data sources to DataRegistry - - Dependencies are declared via: - - provides: List of registry keys this loader registers - - requires: List of registry keys this loader needs - - LoaderRegistry uses topological sort to resolve execution order - automatically based on these declarations. - - Example: - class RiversLoader(DataLoaderPlugin): - provides = ["river"] - requires = [] - - class ProximityLoader(DataLoaderPlugin): - provides = ["river_proximity"] - requires = ["river"] - """ - - provides: list[str] = [] - requires: list[str] = [] - - def __init__(self, name: str): - self.name = name - self.loaded_sources: dict = {} - - @classmethod - @abstractmethod - def can_handle(cls, config) -> bool: - """ - Check if this loader should run based on config - - Args: - config: EngineConfig object - - Returns: - True if this loader should be executed - """ - pass - - @abstractmethod - def load(self, registry: DataRegistry) -> dict: - pass - - def log_loading(self, message: str): - """Log loading progress""" - logger.info(f"[{self.name}] {message}") - - def __repr__(self) -> str: - deps = f", requires={self.requires}" if self.requires else "" - return f"{self.__class__.__name__}(provides={self.provides}{deps})" - - -class LoaderRegistry: - """ - Registry for managing data loader plugins with auto-discovery - - Features: - - Auto-discover loader plugins from directory - - Topological sort based on provides/requires declarations - - Circular dependency detection - - Skip loaders based on config conditions - """ - - def __init__(self): - self.loaders: list[type[DataLoaderPlugin]] = [] - self._discovered = False - - def auto_discover(self, plugins_dir: Path): - """ - Discover and register loader plugins from directory - - Args: - plugins_dir: Root plugins directory to search - """ - import importlib.util - import inspect - - loaders_dir = plugins_dir / "loaders" - - if not loaders_dir.exists(): - logger.warning(f"Loaders directory not found: {loaders_dir}") - return - - logger.info(f"Auto-discovering data loaders in: {loaders_dir}") - - for module_file in loaders_dir.glob("*.py"): - if module_file.name.startswith("_"): - continue - - try: - spec = importlib.util.spec_from_file_location( - f"plugins.loaders.{module_file.stem}", module_file - ) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - for name, obj in inspect.getmembers(module, inspect.isclass): - if (issubclass(obj, DataLoaderPlugin) and - obj is not DataLoaderPlugin and - obj not in self.loaders): - self.loaders.append(obj) - logger.debug( - f" Discovered: {obj.__name__} " - f"(provides={obj.provides}, requires={obj.requires})" - ) - - except Exception as e: - logger.error(f"Failed to load loader module {module_file.name}: {e}") - - self._discovered = True - logger.info(f"Discovered {len(self.loaders)} data loader(s)") - - def register(self, loader_class: type[DataLoaderPlugin]): - if loader_class not in self.loaders: - self.loaders.append(loader_class) - - def _resolve_order(self, active_loaders: list[type[DataLoaderPlugin]]) -> list[type[DataLoaderPlugin]]: - """ - Resolve execution order using topological sort (Kahn's algorithm) - - Args: - active_loaders: List of loader classes to sort - - Returns: - Loaders in dependency-safe execution order - - Raises: - CircularDependencyError: If circular dependencies are detected - """ - from collections import defaultdict, deque - - provider_map: dict[str, type[DataLoaderPlugin]] = {} - for loader in active_loaders: - for key in loader.provides: - provider_map[key] = loader - - graph: dict[type, set[type]] = defaultdict(set) - in_degree: dict[type, int] = {loader: 0 for loader in active_loaders} - - for loader in active_loaders: - for req in loader.requires: - if req in provider_map: - dependency = provider_map[req] - if dependency is not loader: - graph[dependency].add(loader) - in_degree[loader] += 1 - else: - logger.warning( - f"{loader.__name__} requires '{req}' " - f"but no loader provides it" - ) - - queue = deque( - sorted( - [l for l in active_loaders if in_degree[l] == 0], - key=lambda x: x.__name__ - ) - ) - ordered = [] - - while queue: - loader = queue.popleft() - ordered.append(loader) - - for dependent in sorted(graph[loader], key=lambda x: x.__name__): - in_degree[dependent] -= 1 - if in_degree[dependent] == 0: - queue.append(dependent) - - if len(ordered) != len(active_loaders): - unresolved = [l.__name__ for l in active_loaders if l not in ordered] - raise CircularDependencyError( - f"Circular dependency detected among: {unresolved}" - ) - - return ordered - - def load_all(self, config, registry: DataRegistry): - """ - Execute all applicable loaders in dependency-resolved order - - Args: - config: EngineConfig object - registry: DataRegistry to register sources to - """ - if not self._discovered: - logger.warning("Loaders not discovered yet. Call auto_discover() first.") - return - - active = [l for l in self.loaders if l.can_handle(config)] - skipped = [l for l in self.loaders if not l.can_handle(config)] - - for loader_class in skipped: - logger.debug(f"Skipping {loader_class.__name__} (conditions not met)") - - ordered = self._resolve_order(active) - - logger.info("\n" + "=" * 70) - logger.info("DATA LOADING PHASE") - logger.info("=" * 70) - logger.info(f"Load order: {' -> '.join(l.__name__ for l in ordered)}") - - executed = 0 - for loader_class in ordered: - provides_str = ', '.join(loader_class.provides) or 'none' - requires_str = ', '.join(loader_class.requires) or 'none' - logger.info( - f"\n{loader_class.__name__} " - f"(provides=[{provides_str}], requires=[{requires_str}])" - ) - try: - loader = loader_class(config) - loader.load(registry) - executed += 1 - except Exception as e: - logger.error(f"Loader {loader_class.__name__} failed: {e}") - import traceback - traceback.print_exc() - - logger.info("\n" + "=" * 70) - logger.info(f"DATA LOADING COMPLETE ({executed}/{len(ordered)} loader(s) executed)") - logger.info("=" * 70 + "\n") - - def list_loaders(self) -> list[str]: - return [loader.__name__ for loader in self.loaders] - - def __len__(self) -> int: - return len(self.loaders) diff --git a/core/config.py b/core/config.py deleted file mode 100644 index 31023d1..0000000 --- a/core/config.py +++ /dev/null @@ -1,374 +0,0 @@ -from dataclasses import dataclass, field -from pathlib import Path -from typing import Dict, List, Optional, Any -import yaml -import json -from .logger import logger, log_success, log_error - - -@dataclass -class FactorConfig: - name: str - weight: float = 0.0 - source_path: Optional[Path] = None - source_type: str = "raster" - processor: Optional[str] = None - parameters: Dict[str, Any] = field(default_factory=dict) - - def __post_init__(self): - if isinstance(self.source_path, str): - self.source_path = Path(self.source_path) - - -@dataclass -class SpatialConfig: - target_crs: str = "EPSG:4326" - metric_crs: str = "EPSG:3857" - resolution: float = 10.0 - study_area_key: str = "admin" - - filter_province: Optional[List[str]] = None - filter_city: Optional[List[str]] = None - filter_district: Optional[List[str]] = None - - def get_filter_level(self) -> str: - if self.filter_district: - return "district" - elif self.filter_city: - return "city" - elif self.filter_province: - return "province" - return "full" - - def get_active_filter(self) -> Optional[List[str]]: - if self.filter_district: - return self.filter_district - elif self.filter_city: - return self.filter_city - elif self.filter_province: - return self.filter_province - return None - - -@dataclass -class OutputConfig: - output_dir: Path = field(default_factory=lambda: Path("./output")) - formats: List[str] = field(default_factory=lambda: ["geojson"]) - generate_report: bool = True - - def __post_init__(self): - if isinstance(self.output_dir, str): - self.output_dir = Path(self.output_dir) - - -@dataclass -class ClassificationConfig: - method: str = "quantile" - num_classes: int = 5 - class_names: List[str] = field(default_factory=lambda: [ - "Very Low", "Low", "Moderate", "High", "Very High" - ]) - class_colors: List[str] = field(default_factory=lambda: [ - ]) - - thresholds: Optional[List[float]] = None - - -@dataclass -class PathsConfig: - dem: Optional[Path] = None - slope: Optional[Path] = None - twi: Optional[Path] = None - rainfall: Optional[Path] = None - river: Optional[Path] = None - admin_dirs: List[Path] = field(default_factory=list) - output_dir: Path = field(default_factory=lambda: Path("./output")) - - def __post_init__(self): - for attr in ('dem', 'slope', 'twi', 'rainfall', 'river'): - val = getattr(self, attr) - if isinstance(val, str): - setattr(self, attr, Path(val)) - if isinstance(self.output_dir, str): - self.output_dir = Path(self.output_dir) - self.admin_dirs = [Path(p) if isinstance(p, str) else p for p in self.admin_dirs] - - -@dataclass -class WeightsConfig: - rainfall: float = 0.0 - elevation: float = 0.0 - slope: float = 0.0 - twi: float = 0.0 - proximity: float = 0.0 - land_use: float = 0.0 - - def total(self) -> float: - return (self.rainfall + self.elevation + self.slope - + self.twi + self.proximity + self.land_use) - - def as_dict(self) -> Dict[str, float]: - return { - 'rainfall': self.rainfall, - 'elevation': self.elevation, - 'slope': self.slope, - 'twi': self.twi, - 'proximity': self.proximity, - 'land_use': self.land_use, - } - - -@dataclass -class EngineConfig: - name: str = "unnamed_analysis" - description: str = "" - analysis_type: str = "susceptibility" - - spatial: SpatialConfig = field(default_factory=SpatialConfig) - output: OutputConfig = field(default_factory=OutputConfig) - classification: ClassificationConfig = field(default_factory=ClassificationConfig) - - factors: Dict[str, FactorConfig] = field(default_factory=dict) - - admin_dirs: List[Path] = field(default_factory=list) - admin_filename: str = "ADMINISTRASIDESA_AR_25K.shp" - - parameters: Dict[str, Any] = field(default_factory=dict) - - paths: PathsConfig = field(default_factory=PathsConfig) - weights: WeightsConfig = field(default_factory=WeightsConfig) - plugin_parameters: Dict[str, Any] = field(default_factory=dict) - - def get_weights(self) -> Dict[str, float]: - return {name: f.weight for name, f in self.factors.items()} - - def get_factor_path(self, factor_name: str) -> Optional[Path]: - if factor_name in self.factors: - return self.factors[factor_name].source_path - return None - - def validate_weights(self) -> None: - if not self.factors: - return - total = sum(f.weight for f in self.factors.values()) - if abs(total - 1.0) > 0.001: - raise ValueError(f"Weights must sum to 1.0, got {total:.3f}") - - def normalize_weights(self) -> None: - total = sum(f.weight for f in self.factors.values()) - if total > 0: - for factor in self.factors.values(): - factor.weight /= total - log_success(f"Weights normalized (total was {total:.3f})") - - def validate(self) -> None: - self.validate_weights() - self.output.output_dir.mkdir(parents=True, exist_ok=True) - - missing = [] - for name, factor in self.factors.items(): - if factor.source_path and not factor.source_path.exists(): - missing.append(f"{name}: {factor.source_path}") - - if missing: - for m in missing: - log_error(f"Factor path not found: {m}") - - log_success("Configuration validated") - - @classmethod - def from_yaml(cls, yaml_path: Path) -> 'EngineConfig': - logger.info(f"Loading config from: {yaml_path}") - yaml_path = Path(yaml_path) - base_dir = yaml_path.parent.resolve() - - try: - with open(yaml_path, 'r') as f: - data = yaml.safe_load(f) - except Exception as e: - logger.error(f"Failed to read YAML: {e}") - raise - - project = data.get('project', {}) - config = cls( - name=project.get('name', data.get('name', 'unnamed')), - description=project.get('description', data.get('description', '')), - analysis_type=data.get('analysis_type', 'susceptibility'), - ) - - if 'spatial' in data: - config.spatial = SpatialConfig(**data['spatial']) - - if 'output' in data: - out_data = data['output'] - config.output = OutputConfig( - output_dir=Path(out_data.get('dir', './output')), - formats=out_data.get('formats', ['geojson']), - generate_report=out_data.get('generate_report', True) - ) - - if 'classification' in data: - config.classification = ClassificationConfig(**data['classification']) - - is_migrated = 'paths' in data and 'weights' in data - - if is_migrated: - config._load_migrated_format(data, base_dir) - else: - config._load_factors_format(data, base_dir) - - if 'admin_dirs' in data: - config.admin_dirs = [ - Path(p) if Path(p).is_absolute() else base_dir / p - for p in data['admin_dirs'] - ] - - if 'admin_filename' in data: - config.admin_filename = data['admin_filename'] - - config.parameters = data.get('parameters', {}) - - config.validate() - return config - - def _load_migrated_format(self, data: dict, base_dir: Path) -> None: - paths_data = data.get('paths', {}) - - resolve = lambda p: Path(p).resolve() if Path(p).is_absolute() else (base_dir / p).resolve() - - self.paths = PathsConfig( - dem=resolve(paths_data['dem']) if 'dem' in paths_data else None, - slope=resolve(paths_data['slope']) if 'slope' in paths_data else None, - twi=resolve(paths_data['twi']) if 'twi' in paths_data else None, - rainfall=resolve(paths_data['rainfall']) if 'rainfall' in paths_data else None, - river=resolve(paths_data['river']) if 'river' in paths_data else None, - admin_dirs=[ - resolve(p) for p in paths_data.get('admin_dirs', []) - ], - output_dir=resolve(paths_data.get('output_dir', './output')) - ) - - self.admin_dirs = self.paths.admin_dirs - self.output = OutputConfig(output_dir=self.paths.output_dir) - - weights_data = data.get('weights', {}) - self.weights = WeightsConfig(**weights_data) - - self.plugin_parameters = data.get('parameters', {}) - - path_map = { - 'rainfall': self.paths.rainfall, - 'elevation': self.paths.dem, - 'slope': self.paths.slope, - 'twi': self.paths.twi, - } - source_type_map = { - 'rainfall': 'vector', - 'elevation': 'raster', - 'slope': 'raster', - 'twi': 'raster', - } - - for factor_name, weight in self.weights.as_dict().items(): - if weight <= 0: - continue - source_path = path_map.get(factor_name) - self.factors[factor_name] = FactorConfig( - name=factor_name, - weight=weight, - source_path=source_path, - source_type=source_type_map.get(factor_name, 'derived'), - parameters=self.plugin_parameters.get(factor_name, {}) - ) - - logger.info(" Config format: migrated (paths + weights)") - - def _load_factors_format(self, data: dict, base_dir: Path) -> None: - for name, fdata in data.get('factors', {}).items(): - source_path = None - if 'path' in fdata: - p = Path(fdata['path']) - source_path = p if p.is_absolute() else base_dir / p - - self.factors[name] = FactorConfig( - name=name, - weight=fdata.get('weight', 0.0), - source_path=source_path, - source_type=fdata.get('type', 'raster'), - processor=fdata.get('processor'), - parameters=fdata.get('parameters', {}) - ) - - logger.info(" Config format: (factors)") - - def to_yaml(self, yaml_path: Path) -> None: - data = { - 'name': self.name, - 'description': self.description, - 'analysis_type': self.analysis_type, - 'spatial': { - 'target_crs': self.spatial.target_crs, - 'metric_crs': self.spatial.metric_crs, - 'resolution': self.spatial.resolution, - 'filter_province': self.spatial.filter_province, - 'filter_city': self.spatial.filter_city, - 'filter_district': self.spatial.filter_district, - }, - 'output': { - 'dir': str(self.output.output_dir), - 'formats': self.output.formats, - 'generate_report': self.output.generate_report, - }, - 'classification': { - 'method': self.classification.method, - 'num_classes': self.classification.num_classes, - 'class_names': self.classification.class_names, - 'class_colors': self.classification.class_colors, - 'thresholds': self.classification.thresholds, - }, - 'factors': { - name: { - 'weight': f.weight, - 'path': str(f.source_path) if f.source_path else None, - 'type': f.source_type, - 'processor': f.processor, - 'parameters': f.parameters, - } - for name, f in self.factors.items() - }, - 'admin_dirs': [str(p) for p in self.admin_dirs], - 'admin_filename': self.admin_filename, - 'parameters': self.parameters, - } - - yaml_path = Path(yaml_path) - yaml_path.parent.mkdir(parents=True, exist_ok=True) - with open(yaml_path, 'w') as f: - yaml.dump(data, f, default_flow_style=False, sort_keys=False) - - log_success(f"Configuration saved to {yaml_path}") - - def to_json(self, json_path: Path) -> None: - data = { - 'name': self.name, - 'description': self.description, - 'analysis_type': self.analysis_type, - 'factors': { - name: { - 'weight': f.weight, - 'path': str(f.source_path) if f.source_path else None, - 'type': f.source_type, - 'processor': f.processor, - 'parameters': f.parameters, - } - for name, f in self.factors.items() - }, - 'parameters': self.parameters, - } - - json_path = Path(json_path) - json_path.parent.mkdir(parents=True, exist_ok=True) - with open(json_path, 'w') as f: - json.dump(data, f, indent=2) - - log_success(f"Configuration saved to {json_path}") diff --git a/core/data_source.py b/core/data_source.py deleted file mode 100644 index 8b91ac5..0000000 --- a/core/data_source.py +++ /dev/null @@ -1,315 +0,0 @@ -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, Optional, Union -from enum import Enum - -import numpy as np -import pandas as pd -import geopandas as gpd -import rasterio - -from .logger import logger, log_success, log_error - - -class SourceType(Enum): - """Supported data source types""" - RASTER = "raster" - VECTOR = "vector" - POINT = "point" - TABLE = "table" - API = "api" - POINT_CLOUD = "point_cloud" - NETCDF = "netcdf" - - -@dataclass -class DataSource: - """ - Generic data source container with lazy loading - - Supports: - - Raster (GeoTIFF, SDAT, etc.) - - Vector (Shapefile, GeoJSON, GeoPackage) - - Tabular (CSV, Excel) - - Point clouds - """ - name: str - source_type: SourceType - path: Optional[Union[str, Path]] = None - data: Any = None - crs: str = "EPSG:4326" - metadata: Dict = field(default_factory=dict) - - def __post_init__(self): - if isinstance(self.path, str): - self.path = Path(self.path) - - def load(self) -> 'DataSource': - if self.data is not None: - logger.debug(f"Data already loaded: {self.name}") - return self - - if not self.path or not self.path.exists(): - raise FileNotFoundError(f"Data source not found: {self.path}") - - try: - if self.source_type == SourceType.RASTER: - self._load_raster() - elif self.source_type == SourceType.VECTOR: - self._load_vector() - elif self.source_type == SourceType.POINT: - self._load_vector() - elif self.source_type == SourceType.TABLE: - self._load_table() - - log_success(f"Loaded: {self.name} ({self.source_type.value})") - except Exception as e: - log_error(f"Failed to load {self.name}: {e}") - raise - - return self - - def _load_raster(self): - with rasterio.open(self.path) as src: - self.data = src.read(1) - self.metadata['transform'] = src.transform - self.metadata['profile'] = src.profile - self.metadata['bounds'] = src.bounds - self.metadata['shape'] = (src.height, src.width) - self.crs = str(src.crs) - - logger.debug(f" Shape: {self.metadata['shape']}") - logger.debug(f" CRS: {self.crs}") - - def _load_vector(self): - self.data = gpd.read_file(self.path).to_crs(self.crs) - - invalid = ~self.data.geometry.is_valid - if invalid.any(): - count = invalid.sum() - logger.warning(f" Repairing {count} invalid geometries") - self.data.loc[invalid, 'geometry'] = \ - self.data.loc[invalid, 'geometry'].buffer(0) - - empty = self.data.geometry.is_empty - if empty.any(): - count = empty.sum() - logger.warning(f" Removing {count} empty geometries") - self.data = self.data[~empty] - - self.metadata['count'] = len(self.data) - self.metadata['bounds'] = self.data.total_bounds - - logger.debug(f" Features: {self.metadata['count']}") - - def _load_table(self): - if self.path.suffix == '.csv': - self.data = pd.read_csv(self.path) - elif self.path.suffix in ['.xlsx', '.xls']: - self.data = pd.read_excel(self.path) - else: - raise ValueError(f"Unsupported table format: {self.path.suffix}") - - self.metadata['rows'] = len(self.data) - self.metadata['columns'] = list(self.data.columns) - - logger.debug(f" Rows: {self.metadata['rows']}") - logger.debug(f" Columns: {len(self.metadata['columns'])}") - - def is_loaded(self) -> bool: - return self.data is not None - - def unload(self) -> None: - self.data = None - logger.debug(f"Unloaded: {self.name}") - - def get_bounds(self) -> Optional[tuple]: - if 'bounds' in self.metadata: - return self.metadata['bounds'] - return None - - def __repr__(self) -> str: - status = "loaded" if self.is_loaded() else "not loaded" - return f"DataSource(name='{self.name}', type={self.source_type.value}, {status})" - - -class DataRegistry: - """ - Centralized registry for all data sources - """ - - def __init__(self): - self.sources: Dict[str, DataSource] = {} - - def register(self, name: str, source: DataSource) -> None: - if name in self.sources: - logger.warning(f"Overwriting existing source: {name}") - - self.sources[name] = source - log_success(f"Registered: {name}") - - def register_from_path(self, name: str, path: Union[str, Path], - source_type: SourceType, crs: str = "EPSG:4326") -> None: - source = DataSource( - name=name, - source_type=source_type, - path=path, - crs=crs - ) - self.register(name, source) - - def get(self, name: str, auto_load: bool = True) -> DataSource: - if name not in self.sources: - raise KeyError(f"Data source '{name}' not registered. " - f"Available: {self.list_sources()}") - - source = self.sources[name] - - if auto_load and not source.is_loaded(): - source.load() - - return source - - def has(self, name: str) -> bool: - return name in self.sources - - def list_sources(self) -> list[str]: - return list(self.sources.keys()) - - def list_by_type(self, source_type: SourceType) -> list[str]: - return [name for name, src in self.sources.items() - if src.source_type == source_type] - - def unload_all(self) -> None: - for source in self.sources.values(): - source.unload() - logger.info("All data sources unloaded") - - def get_memory_usage(self) -> Dict[str, int]: - usage = {} - for name, source in self.sources.items(): - if source.is_loaded(): - if isinstance(source.data, np.ndarray): - usage[name] = source.data.nbytes - elif isinstance(source.data, (gpd.GeoDataFrame, pd.DataFrame)): - usage[name] = source.data.memory_usage(deep=True).sum() - return usage - - def print_summary(self) -> None: - logger.info("\n" + "=" * 70) - logger.info("DATA REGISTRY SUMMARY") - logger.info("=" * 70) - - for name, source in self.sources.items(): - status = "Loaded" if source.is_loaded() else "Not loaded" - logger.info(f"{status} | {source.source_type.value:8s} | {name}") - - logger.info("=" * 70 + "\n") - - def __len__(self) -> int: - return len(self.sources) - - def __contains__(self, name: str) -> bool: - return name in self.sources - - def register_vector(self, name: str, gdf: gpd.GeoDataFrame, - crs: str = None, **metadata) -> DataSource: - """ - Shortcut: register a GeoDataFrame as a vector source. - - Args: - name: Registry key - gdf: GeoDataFrame to register - crs: CRS string (defaults to gdf.crs) - **metadata: Additional metadata fields - - Returns: - Created DataSource - """ - source = DataSource( - name=name, - source_type=SourceType.VECTOR, - data=gdf, - crs=crs or str(gdf.crs), - metadata=metadata - ) - self.register(name, source) - return source - - def register_raster_array(self, name: str, array: np.ndarray, - crs: str = None, **metadata) -> DataSource: - """ - Shortcut: register a numpy array as a raster source. - - Args: - name: Registry key - array: Numpy array to register - crs: CRS string - **metadata: Additional metadata fields - - Returns: - Created DataSource - """ - source = DataSource( - name=name, - source_type=SourceType.RASTER, - data=array, - crs=crs, - metadata=metadata - ) - self.register(name, source) - return source - - -def load_vector_from_multiple_dirs( - directories: list[Path], - filename: str, - target_crs: str = "EPSG:4326", - validate_geometries: bool = True -) -> gpd.GeoDataFrame: - """ - Generic helper to load and merge vector data from multiple directories - - This is a reusable utility for loader plugins that need to scan - multiple directories for the same filename pattern. - - Args: - directories: List of directories to search - filename: Name of the vector file to load - target_crs: Target coordinate reference system - validate_geometries: Whether to repair invalid geometries - - Returns: - Merged GeoDataFrame (empty if no files found) - """ - frames = [] - - for directory in directories: - path = Path(directory) / filename - if path.exists(): - try: - gdf = gpd.read_file(path).to_crs(target_crs) - - if validate_geometries: - invalid = ~gdf.geometry.is_valid - if invalid.any(): - count = invalid.sum() - logger.debug(f" Repairing {count} invalid geometries in {path.name}") - gdf.loc[invalid, 'geometry'] = gdf.loc[invalid, 'geometry'].buffer(0) - - empty = gdf.geometry.is_empty - if empty.any(): - count = empty.sum() - logger.debug(f" Removing {count} empty geometries from {path.name}") - gdf = gdf[~empty] - - frames.append(gdf) - logger.debug(f" Loaded: {path.name} ({len(gdf)} features)") - except Exception as e: - logger.warning(f"Failed to load {path}: {e}") - - if not frames: - return gpd.GeoDataFrame(geometry=[], crs=target_crs) - - merged = gpd.GeoDataFrame(pd.concat(frames, ignore_index=True), crs=target_crs) - return merged diff --git a/core/file_scanner.py b/core/file_scanner.py deleted file mode 100644 index 381cda7..0000000 --- a/core/file_scanner.py +++ /dev/null @@ -1,173 +0,0 @@ -from pathlib import Path -from typing import List, Optional - -import geopandas as gpd -import pandas as pd - -from .logger import logger, log_warning - - -def scan_shapefiles(directories: List[Path], - patterns: List[str], - target_crs: str = "EPSG:4326") -> gpd.GeoDataFrame: - """ - Scan directories for shapefiles matching pattern names. - - Args: - directories: List of directories to search - patterns: Filename patterns to match (case-insensitive substring match) - target_crs: Target CRS to reproject to - - Returns: - Merged GeoDataFrame (empty if no files found) - """ - frames = [] - - for directory in directories: - directory = Path(directory) - if not directory.exists(): - continue - - for shp_file in directory.glob("*.shp"): - if _match_pattern(shp_file.name, patterns): - try: - gdf = gpd.read_file(shp_file).to_crs(target_crs) - frames.append(gdf) - logger.debug(f" Found: {shp_file.name} ({len(gdf)} features)") - except Exception as e: - log_warning(f"Failed to load SHP {shp_file.name}: {e}") - - return _merge_frames(frames, target_crs) - - -def scan_gdb_layers(directories: List[Path], - patterns: List[str], - target_crs: str = "EPSG:4326") -> gpd.GeoDataFrame: - """ - Scan directories for GeoDatabase layers matching pattern names. - - Args: - directories: List of directories to search - patterns: Layer name patterns to match (case-insensitive substring match) - target_crs: Target CRS to reproject to - - Returns: - Merged GeoDataFrame (empty if no layers found) - """ - try: - import fiona - except ImportError: - log_warning("fiona not installed, cannot scan GDB files") - return gpd.GeoDataFrame() - - frames = [] - - for directory in directories: - directory = Path(directory) - if not directory.exists(): - continue - - for gdb_dir in directory.glob("*.gdb"): - try: - layers = fiona.listlayers(str(gdb_dir)) - for layer in layers: - if _match_pattern(layer, patterns): - try: - gdf = gpd.read_file(gdb_dir, layer=layer).to_crs(target_crs) - frames.append(gdf) - logger.debug( - f" Found: {gdb_dir.name}/{layer} ({len(gdf)} features)" - ) - except Exception as e: - log_warning(f"Failed to load GDB layer {layer}: {e}") - except Exception as e: - log_warning(f"Failed to read GDB {gdb_dir.name}: {e}") - - return _merge_frames(frames, target_crs) - - -def scan_all(directories: List[Path], - patterns: List[str], - target_crs: str = "EPSG:4326") -> gpd.GeoDataFrame: - """ - Scan both shapefiles and GDB layers. - - Args: - directories: List of directories to search - patterns: Filename/layer patterns to match - target_crs: Target CRS to reproject to - - Returns: - Merged GeoDataFrame from all sources - """ - gdf_shp = scan_shapefiles(directories, patterns, target_crs) - gdf_gdb = scan_gdb_layers(directories, patterns, target_crs) - - frames = [] - if not gdf_shp.empty: - frames.append(gdf_shp) - if not gdf_gdb.empty: - frames.append(gdf_gdb) - - return _merge_frames(frames, target_crs) - - -def scan_filename(directories: List[Path], - filename: str, - target_crs: str = "EPSG:4326", - validate_geometries: bool = True) -> gpd.GeoDataFrame: - """ - Scan directories for an exact filename match (e.g., ADMINISTRASIDESA_AR_25K.shp). - - This replaces the existing `load_vector_from_multiple_dirs` function. - - Args: - directories: List of directories to search - filename: Exact filename to match - target_crs: Target CRS to reproject to - validate_geometries: Whether to remove invalid geometries - - Returns: - Merged GeoDataFrame - """ - frames = [] - - for directory in directories: - directory = Path(directory) - file_path = directory / filename - - if file_path.exists(): - try: - gdf = gpd.read_file(file_path).to_crs(target_crs) - - if validate_geometries: - invalid = ~gdf.geometry.is_valid - if invalid.any(): - gdf.loc[invalid, 'geometry'] = gdf.loc[invalid].geometry.buffer(0) - logger.debug(f" Fixed {invalid.sum()} invalid geometries in {file_path.name}") - - frames.append(gdf) - logger.debug(f" Found: {file_path.name} ({len(gdf)} features)") - except Exception as e: - log_warning(f"Failed to load {file_path.name}: {e}") - - return _merge_frames(frames, target_crs) - - -def _match_pattern(name: str, patterns: List[str]) -> bool: - name_upper = name.upper() - return any(p.upper() in name_upper for p in patterns) - - -def _merge_frames(frames: list, target_crs: str) -> gpd.GeoDataFrame: - """Merge list of GeoDataFrames into one.""" - if not frames: - return gpd.GeoDataFrame() - - if len(frames) == 1: - return frames[0] - - return gpd.GeoDataFrame( - pd.concat(frames, ignore_index=True), - crs=target_crs - ) diff --git a/core/hydro.py b/core/hydro.py deleted file mode 100644 index bfc3bfa..0000000 --- a/core/hydro.py +++ /dev/null @@ -1,57 +0,0 @@ -import numpy as np -from scipy.ndimage import generate_binary_structure -from scipy.ndimage import grey_dilation, binary_dilation - -from .logger import logger - -""" -[TODO] - -This is not currently used. - -I'm planning to implement more core logic to be easily used later. -""" - - -def fill_sinks(dem: np.ndarray, epsilon: float = 1e-5) -> np.ndarray: - """ - Fill local depressions (sinks) in DEM using iterative morphological reconstruction. - Based on Wang & Liu (2006) concept. - """ - logger.debug(" Filling sinks in DEM...") - - rows, cols = dem.shape - - result = np.full_like(dem, np.inf) - - mask_boundary = np.ones_like(dem, dtype=bool) - mask_boundary[1:-1, 1:-1] = False - mask_boundary |= np.isnan(dem) - - result[mask_boundary] = dem[mask_boundary] - - kernel = generate_binary_structure(2, 2) - from scipy.ndimage import minimum_filter - - prev = result.copy() - - for _ in range(1000) if getattr(dem, 'size', 0) < 1e6 else range(100): - min_n = minimum_filter(result, footprint=kernel, mode='constant', cval=np.inf) - result = np.maximum(dem, min_n) - - if np.array_equal(result, prev): - break - prev = result.copy() - - return result.astype(np.float32) - - -def flow_accumulation(dem: np.ndarray) -> np.ndarray: - """ - Compute simplified D8 Flow Accumulation proxy based on slope. - """ - dy, dx = np.gradient(dem) - slope_rad = np.arctan(np.sqrt(dx ** 2 + dy ** 2)) - slope_rad = np.maximum(slope_rad, 0.001) - - return 1.0 / slope_rad diff --git a/core/logger.py b/core/logger.py deleted file mode 100644 index 53243df..0000000 --- a/core/logger.py +++ /dev/null @@ -1,77 +0,0 @@ -import logging -import sys -from pathlib import Path -from typing import Optional - - -class LoggerSetup: - """ - Just logger. - """ - - _instance = None - - @staticmethod - def setup(name: str = "MGRE", - log_file: Optional[Path] = None, - level: int = logging.INFO) -> logging.Logger: - - logger = logging.getLogger(name) - - if logger.handlers: - return logger - - logger.setLevel(level) - - fmt = logging.Formatter( - '%(asctime)s | %(levelname)-8s | %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' - ) - - console = logging.StreamHandler(sys.stdout) - console.setFormatter(fmt) - logger.addHandler(console) - - if log_file: - log_file.parent.mkdir(parents=True, exist_ok=True) - file_handler = logging.FileHandler(log_file) - file_handler.setFormatter(fmt) - logger.addHandler(file_handler) - - return logger - - @staticmethod - def get_logger(name: str = "MGRE") -> logging.Logger: - return logging.getLogger(name) - - -logger = LoggerSetup.setup() - - -def log_section(title: str, width: int = 70) -> None: - logger.info("\n" + "=" * width) - logger.info(title.center(width)) - logger.info("=" * width + "\n") - - -def log_subsection(title: str, width: int = 70) -> None: - logger.info("\n" + "-" * width) - logger.info(title) - logger.info("-" * width) - - -def log_success(message: str) -> None: - logger.info(message) - - -def log_warning(message: str) -> None: - logger.warning(message) - - -def log_error(message: str) -> None: - logger.error(message) - - -def log_progress(current: int, total: int, prefix: str = "") -> None: - pct = (current / total) * 100 - logger.info(f"{prefix} [{current}/{total}] {pct:.1f}%") diff --git a/core/normalize.py b/core/normalize.py deleted file mode 100644 index d5cafa7..0000000 --- a/core/normalize.py +++ /dev/null @@ -1,173 +0,0 @@ -import numpy as np -from typing import Optional, Tuple - - -def percentile(data: np.ndarray, - lower: float = 5, - upper: float = 95) -> np.ndarray: - """ - Percentile-based normalization (robust to outliers). - - Args: - data: Input array - lower: Lower percentile cutoff - upper: Upper percentile cutoff - - Returns: - Normalized array (0-1, float32) - """ - valid = np.isfinite(data) - if not valid.any(): - return np.zeros_like(data, dtype=np.float32) - - d_min, d_max = np.percentile(data[valid], [lower, upper]) - - if d_max == d_min: - return np.full_like(data, 0.5, dtype=np.float32) - - normalized = np.clip((data - d_min) / (d_max - d_min), 0, 1) - return normalized.astype(np.float32) - - -def minmax(data: np.ndarray) -> np.ndarray: - """ - Standard min-max normalization. - - Returns: - Normalized array (0-1, float32) - """ - valid = np.isfinite(data) - if not valid.any(): - return np.zeros_like(data, dtype=np.float32) - - d_min = np.nanmin(data) - d_max = np.nanmax(data) - - if d_max == d_min: - return np.full_like(data, 0.5, dtype=np.float32) - - normalized = (data - d_min) / (d_max - d_min) - return normalized.astype(np.float32) - - -def rank(data: np.ndarray) -> np.ndarray: - """ - Rank-based normalization. Each value is replaced by its rank - among valid values, then scaled to 0-1. - - Useful for highly skewed distributions. - - Returns: - Normalized array (0-1, float32) - """ - result = np.full_like(data, np.nan, dtype=np.float32) - valid = np.isfinite(data) - - if not valid.any(): - return np.zeros_like(data, dtype=np.float32) - - flat = data[valid] - ranks = flat.argsort().argsort() + 1 - n = len(ranks) - result[valid] = (ranks / n).astype(np.float32) - - return result - - -def zscore(data: np.ndarray, - clip_sigma: float = 3.0) -> np.ndarray: - """ - Z-score normalization, clipped to [-clip_sigma, +clip_sigma] - then scaled to 0-1. - - Args: - data: Input array - clip_sigma: Number of standard deviations to clip - - Returns: - Normalized array (0-1, float32) - """ - valid = np.isfinite(data) - if not valid.any(): - return np.zeros_like(data, dtype=np.float32) - - mean = np.nanmean(data) - std = np.nanstd(data) - - if std == 0: - return np.full_like(data, 0.5, dtype=np.float32) - - z = (data - mean) / std - z = np.clip(z, -clip_sigma, clip_sigma) - normalized = (z + clip_sigma) / (2 * clip_sigma) - return normalized.astype(np.float32) - - -def invert(data: np.ndarray) -> np.ndarray: - return (1 - data).astype(np.float32) - - -def classify_quantile(data: np.ndarray, - n_classes: int = 5) -> Tuple[np.ndarray, list]: - """ - Classify into n_classes using quantile breaks. - - Returns: - (classified array (1-based integers), break values) - """ - valid = np.isfinite(data) - classified = np.zeros_like(data, dtype=np.uint8) - - if not valid.any(): - return classified, [] - - quantiles = np.linspace(0, 100, n_classes + 1) - breaks = np.percentile(data[valid], quantiles).tolist() - - for i in range(n_classes): - if i < n_classes - 1: - mask = (data >= breaks[i]) & (data < breaks[i + 1]) - else: - mask = data >= breaks[i] - classified[mask] = i + 1 - - return classified, breaks - - -def classify_jenks(data: np.ndarray, - n_classes: int = 5, - sample_size: int = 100000) -> Tuple[np.ndarray, list]: - """ - Classify using Jenks Natural Breaks (if jenkspy available). - Falls back to quantile if not available. - - Returns: - (classified array (1-based integers), break values) - """ - try: - import jenkspy - except ImportError: - return classify_quantile(data, n_classes) - - valid = np.isfinite(data) - classified = np.zeros_like(data, dtype=np.uint8) - - if not valid.any(): - return classified, [] - - values = data[valid] - if len(values) > sample_size: - sample = np.random.choice(values, sample_size, replace=False) - else: - sample = values - - breaks = jenkspy.jenks_breaks(sample, n_classes=n_classes) - - for i in range(len(breaks) - 1): - if i < len(breaks) - 2: - mask = (data >= breaks[i]) & (data < breaks[i + 1]) - else: - mask = data >= breaks[i] - classified[mask] = i + 1 - - return classified, breaks diff --git a/core/processing.py b/core/processing.py deleted file mode 100644 index f8466a0..0000000 --- a/core/processing.py +++ /dev/null @@ -1,92 +0,0 @@ -import numpy as np -from scipy.ndimage import distance_transform_edt - -from .data_source import DataSource -from .resampler import RasterResampler, VectorResampler - - -class ProcessingContext: - """ - Usage in a plugin: - ctx = ProcessingContext(grid_shape, transform, crs) - dem = ctx.resample_raster(registry.get('dem')) - risk = ctx.distance_to_features(registry.get('river')) - """ - - def __init__(self, grid_shape, transform, crs): - self.grid_shape = grid_shape - self.transform = transform - self.crs = crs - self.resolution = abs(transform.a) - - def resample_raster(self, source: DataSource, - method: str = "bilinear") -> np.ndarray: - """ - Resample a raster DataSource to the analysis grid. - - Args: - source: DataSource with raster data - method: Resampling method (bilinear, nearest, cubic, etc.) - - Returns: - Resampled array matching grid_shape - """ - resampler = RasterResampler( - self.grid_shape, self.transform, self.crs, method=method - ) - return resampler.resample(source) - - def rasterize_vector(self, source: DataSource, - column: str = None, - all_touched: bool = True, - fill_value: float = 0.0) -> np.ndarray: - """ - Rasterize a vector DataSource to the analysis grid. - - Args: - source: DataSource with vector data - column: Column name to use for burn-in values - all_touched: Whether to rasterize all touched pixels - fill_value: Fill value for empty pixels - - Returns: - Rasterized array matching grid_shape - """ - resampler = VectorResampler( - self.grid_shape, self.transform, self.crs - ) - return resampler.resample( - source, - value_column=column, - all_touched=all_touched, - fill_value=fill_value - ) - - def empty_grid(self, fill: float = 0.0, - dtype=np.float32) -> np.ndarray: - return np.full(self.grid_shape, fill, dtype=dtype) - - def boolean_grid(self, fill: bool = False) -> np.ndarray: - return np.full(self.grid_shape, fill, dtype=bool) - - def distance_to_features(self, source: DataSource, - max_distance: float = None) -> np.ndarray: - """ - Compute distance from each pixel to the nearest vector feature. - - Args: - source: DataSource with vector geometry - max_distance: If set, distances beyond this are clipped. - The result is also inverted: close=1, far=0. - - Returns: - Distance raster in CRS units (or normalized 0-1 if max_distance set) - """ - feature_mask = self.rasterize_vector(source).astype(bool) - dist_pixels = distance_transform_edt(~feature_mask) - dist_units = dist_pixels * self.resolution - - if max_distance is not None: - return np.clip(1 - (dist_units / max_distance), 0, 1).astype(np.float32) - - return dist_units.astype(np.float32) diff --git a/core/resampler.py b/core/resampler.py deleted file mode 100644 index b7f68ff..0000000 --- a/core/resampler.py +++ /dev/null @@ -1,101 +0,0 @@ -from abc import ABC, abstractmethod - -import numpy as np -import rasterio -from rasterio.enums import Resampling -from rasterio.features import rasterize -from rasterio.warp import reproject - -from murasa.core.data_source import DataSource - - -class BaseResampler(ABC): - def __init__(self, grid_shape, transform, crs): - self.grid_shape = grid_shape - self.transform = transform - self.crs = crs - - @abstractmethod - def resample(self, source: DataSource) -> np.ndarray: - pass - - def validate_output(self, result: np.ndarray) -> np.ndarray: - pass - - def fill_nodata(self, data: np.ndarray, fill_value=0.0) -> np.ndarray: - pass - - -class RasterResampler(BaseResampler): - def __init__(self, grid_shape, transform, crs, method="bilinear"): - super().__init__(grid_shape, transform, crs) - self.method = method - - def _get_resampling_method(self): - match self.method: - case "bilinear": - return Resampling.bilinear - case "nearest": - return Resampling.nearest - case "cubic": - return Resampling.cubic - case "average": - return Resampling.average - case "mode": - return Resampling.mode - case "max": - return Resampling.max - case "min": - return Resampling.min - case "med": - return Resampling.med - case "q1": - return Resampling.q1 - case "q3": - return Resampling.q3 - case _: - raise ValueError(f"Unknown resampling method: {self.method}") - - def resample(self, source: DataSource) -> np.ndarray: - raster = np.zeros(self.grid_shape, dtype=np.float32) - - resampling = self._get_resampling_method() - - with rasterio.open(source.path) as src: - reproject( - source=rasterio.band(src, 1), - destination=raster, - src_transform=src.transform, - src_crs=src.crs, - dst_transform=self.transform, - dst_crs=self.crs, - resampling=resampling - ) - - return raster - - -class VectorResampler(BaseResampler): - def __init__(self, grid_shape, transform, crs): - super().__init__(grid_shape, transform, crs) - - def resample(self, source: DataSource, - value_column: str = None, - all_touched: bool = True, - fill_value: float = 0.0) -> np.ndarray: - - gdf = source.data.to_crs(self.crs) - if value_column and value_column in gdf.columns: - shapes = [(geom, val) for geom, val in - zip(gdf.geometry, gdf[value_column])] - else: - shapes = [(geom, 1) for geom in gdf.geometry] - result = rasterize( - shapes=shapes, - out_shape=self.grid_shape, - transform=self.transform, - fill=fill_value, - dtype=np.float32, - all_touched=all_touched - ) - return result diff --git a/core/terrain.py b/core/terrain.py deleted file mode 100644 index 0148dbb..0000000 --- a/core/terrain.py +++ /dev/null @@ -1,99 +0,0 @@ -import numpy as np - - -def calculate_gradient(dem: np.ndarray, cell_size: float = 1.0) -> tuple[np.ndarray, np.ndarray]: - """ - Calculate first derivatives (gradients) of the DEM. - - Args: - dem: Digital Elevation Model array - cell_size: Resolution of the DEM (in same units as elevation) - - Returns: - (dz_dy, dz_dx) tuple - """ - - dz_dy, dz_dx = np.gradient(dem, cell_size) - return dz_dy, dz_dx - - -def calculate_slope(dem: np.ndarray, cell_size: float = 1.0, degrees: bool = True) -> np.ndarray: - """ - Calculate slope from DEM. - - Args: - dem: DEM array - cell_size: Resolution - degrees: Return in degrees (True) or radians (False) - """ - dz_dy, dz_dx = calculate_gradient(dem, cell_size) - - hypot = np.hypot(dz_dx, dz_dy) - slope_rad = np.arctan(hypot) - - if degrees: - return np.degrees(slope_rad) - return slope_rad - - -def calculate_aspect(dem: np.ndarray, cell_size: float = 1.0) -> np.ndarray: - """ - Calculate aspect (direction of slope) in degrees [0, 360]. - North = 0, East = 90, South = 180, West = 270. - """ - dz_dy, dz_dx = calculate_gradient(dem, cell_size) - - aspect = np.degrees(np.arctan2(-dz_dx, dz_dy)) - aspect = np.where(aspect < 0, aspect + 360, aspect) - - return aspect - - -def calculate_curvature(dem: np.ndarray, cell_size: float = 1.0, method: str = 'profile') -> np.ndarray: - """ - Compute terrain curvature (second derivative metrics). - - Args: - method: 'profile', 'plan', or 'total' - """ - - dz_dy, dz_dx = np.gradient(dem, cell_size) - - d2z_dx2 = np.gradient(dz_dx, cell_size, axis=1) - d2z_dy2 = np.gradient(dz_dy, cell_size, axis=0) - d2z_dxdy = np.gradient(dz_dx, cell_size, axis=0) - - p = dz_dx - q = dz_dy - p2 = p * p - q2 = q * q - - if method == "profile": - - num = p2 * d2z_dx2 + 2 * p * q * d2z_dxdy + q2 * d2z_dy2 - denom = (p2 + q2) * np.power(1 + p2 + q2, 1.5) - - denom = np.where(denom == 0, 1.0, denom) - - return -num / denom - - elif method == "plan": - - num = q2 * d2z_dx2 - 2 * p * q * d2z_dxdy + p2 * d2z_dy2 - denom = np.power(p2 + q2, 1.5) - denom = np.where(denom == 0, 1.0, denom) - - return num / denom - - elif method == "total": - return d2z_dx2 + d2z_dy2 - - elif method == "tangential": - - num = q2 * d2z_dx2 - 2 * p * q * d2z_dxdy + p2 * d2z_dy2 - denom = (p2 + q2) * np.sqrt(1 + p2 + q2) - denom = np.where(denom == 0, 1.0, denom) - return num / denom - - else: - raise ValueError(f"Unknown curvature method: {method}") diff --git a/engine/__init__.py b/engine/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/engine/presets.py b/engine/presets.py deleted file mode 100644 index 7de66f5..0000000 --- a/engine/presets.py +++ /dev/null @@ -1,314 +0,0 @@ -from pathlib import Path -from typing import Optional, Dict -import numpy as np - -from .risk_engine import RiskEngine -from murasa.core.config import EngineConfig -from murasa.core.data_source import DataSource, SourceType, load_admin_boundaries, load_rivers -from murasa.plugins.risk_factors import ( - RainfallPlugin, ElevationPlugin, SlopePlugin, - TWIPlugin, ProximityPlugin, LandUsePlugin -) -from murasa.core.logger import logger, log_section - - -class FloodRiskEngine(RiskEngine): - """ - Pre-configured engine for flood risk assessment - - The defaults are specific for my use case. Can also serve as an example. - - Administration boundaries as well. - """ - - def __init__(self, config: Optional[EngineConfig] = None): - super().__init__(config) - log_section("FLOOD RISK ENGINE INITIALIZED") - - def auto_configure(self, - dem_path: Optional[Path] = None, - slope_path: Optional[Path] = None, - twi_path: Optional[Path] = None, - rainfall_path: Optional[Path] = None, - river_path: Optional[Path] = None, - admin_dirs: Optional[list] = None, - landuse_paths: Optional[Dict[str, Path]] = None) -> None: - """ - Auto-configure flood risk engine - - Args: - dem_path: DEM raster path - slope_path: Slope raster path (optional, computed from DEM if None) - twi_path: TWI raster path (optional, computed from DEM if None) - rainfall_path: Rainfall vector path - river_path: River network path - admin_dirs: List of admin boundary directories - landuse_paths: Dict of {'type': path} for landuse layers - """ - log_section("AUTO-CONFIGURING FLOOD ENGINE") - - if dem_path: - self.register_data_from_path('dem', dem_path, SourceType.RASTER) - - if slope_path: - self.register_data_from_path('slope', slope_path, SourceType.RASTER) - - if twi_path: - self.register_data_from_path('twi', twi_path, SourceType.RASTER) - - if rainfall_path: - self.register_data_from_path('rainfall', rainfall_path, SourceType.VECTOR) - - if river_path: - self.register_data_from_path('river', river_path, SourceType.VECTOR) - elif admin_dirs: - try: - gdf_rivers = load_rivers(admin_dirs, target_crs=self.crs) - if not gdf_rivers.empty: - self.register_data('river', DataSource( - name='river', - source_type=SourceType.VECTOR, - data=gdf_rivers, - crs=self.crs - )) - except Exception as e: - logger.warning(f"Could not load rivers: {e}") - - if admin_dirs: - try: - gdf_admin = load_admin_boundaries(admin_dirs, target_crs=self.crs) - - gdf_admin = self._apply_spatial_filter(gdf_admin) - - self.register_data('admin', DataSource( - name='admin', - source_type=SourceType.VECTOR, - data=gdf_admin, - crs=self.crs - )) - except Exception as e: - logger.warning(f"Could not load admin boundaries: {e}") - - if landuse_paths: - for lu_type, lu_path in landuse_paths.items(): - self.register_data_from_path( - f'landuse_{lu_type}', - lu_path, - SourceType.VECTOR - ) - - self.register_parameter(TWIPlugin(weight=0.25)) - self.register_parameter(ElevationPlugin(weight=0.20, inverse=True)) - self.register_parameter(LandUsePlugin(weight=0.20)) - - if rainfall_path: - self.register_parameter(RainfallPlugin(weight=0.15)) - - self.register_parameter(SlopePlugin(weight=0.10, inverse=True)) - - if river_path or (admin_dirs and self.registry.has('river')): - self.register_parameter(ProximityPlugin( - weight=0.10, - feature_name='river', - max_distance=500 - )) - - logger.info("Flood risk engine configured") - - def _apply_spatial_filter(self, gdf: 'gpd.GeoDataFrame') -> 'gpd.GeoDataFrame': - """Apply hierarchical spatial filtering""" - filter_level = self.config.spatial.get_filter_level() - - if filter_level == 'full': - logger.info(f" Using full scope: {len(gdf)} units") - return gdf - - col_map = { - 'district': 'WADMKC', - 'city': 'WADMKK', - 'province': 'WADMPR' - } - - col_name = col_map.get(filter_level) - if col_name not in gdf.columns: - logger.warning(f" Column '{col_name}' not found, using full scope") - return gdf - - filter_values = self.config.spatial.get_active_filter() - - gdf_upper = gdf[col_name].str.upper() - filter_upper = [v.upper() for v in filter_values] - - filtered = gdf[gdf_upper.isin(filter_upper)].copy() - - logger.info(f" Filtered by {filter_level} ({filter_values}): " - f"{len(filtered)} units") - - return filtered - - -class LandslideRiskEngine(RiskEngine): - """ - Pre-configured engine for landslide risk assessment - - Default parameters (INVERTED logic from flood): - - Slope: 35% (steep = high risk) - - Land Use: 25% (vegetation protects) - - Elevation: 15% (high altitude) - - Rainfall: 15% (trigger) - - Geology: 10% (if available) - - This also servers as an example for now. I still haven't tested this. - """ - - def __init__(self, config: Optional[EngineConfig] = None): - super().__init__(config) - log_section("LANDSLIDE RISK ENGINE INITIALIZED") - - def auto_configure(self, - dem_path: Optional[Path] = None, - slope_path: Optional[Path] = None, - rainfall_path: Optional[Path] = None, - geology_path: Optional[Path] = None, - admin_dirs: Optional[list] = None, - landuse_paths: Optional[Dict[str, Path]] = None) -> None: - - log_section("AUTO-CONFIGURING LANDSLIDE ENGINE") - - if dem_path: - self.register_data_from_path('dem', dem_path, SourceType.RASTER) - - if slope_path: - self.register_data_from_path('slope', slope_path, SourceType.RASTER) - - if rainfall_path: - self.register_data_from_path('rainfall', rainfall_path, SourceType.VECTOR) - - if geology_path: - self.register_data_from_path('geology', geology_path, SourceType.VECTOR) - - if admin_dirs: - try: - gdf_admin = load_admin_boundaries(admin_dirs, target_crs=self.crs) - self.register_data('admin', DataSource( - name='admin', - source_type=SourceType.VECTOR, - data=gdf_admin, - crs=self.crs - )) - except Exception as e: - logger.warning(f"Could not load admin boundaries: {e}") - - if landuse_paths: - for lu_type, lu_path in landuse_paths.items(): - self.register_data_from_path( - f'landuse_{lu_type}', - lu_path, - SourceType.VECTOR - ) - - self.register_parameter(SlopePlugin(weight=0.35, inverse=False)) - - landslide_coeff = { - 'forest': 0.2, - 'bare_soil': 0.95, - 'agriculture': 0.7, - 'settlement': 0.6, - 'default': 0.5 - } - self.register_parameter(LandUsePlugin( - weight=0.25, - coefficients=landslide_coeff - )) - - self.register_parameter(ElevationPlugin(weight=0.15, inverse=False)) - - if rainfall_path: - self.register_parameter(RainfallPlugin(weight=0.15)) - - logger.info("Landslide risk engine configured") - - -class DroughtRiskEngine(RiskEngine): - """ - Pre-configured engine for drought risk assessment - - Default parameters: - - Rainfall deficit: 30% - - TWI (low = high risk): 25% - - Land Use (irrigation access): 20% - - Soil type: 15% - - Elevation: 10% - - This also servers as an example for now. I also haven't tested this. - """ - - def __init__(self, config: Optional[EngineConfig] = None): - super().__init__(config) - log_section("DROUGHT RISK ENGINE INITIALIZED") - - def auto_configure(self, - rainfall_path: Optional[Path] = None, - twi_path: Optional[Path] = None, - dem_path: Optional[Path] = None, - soil_path: Optional[Path] = None, - admin_dirs: Optional[list] = None) -> None: - - log_section("AUTO-CONFIGURING DROUGHT ENGINE") - - if rainfall_path: - self.register_data_from_path('rainfall', rainfall_path, SourceType.VECTOR) - - if twi_path: - self.register_data_from_path('twi', twi_path, SourceType.RASTER) - - if dem_path: - self.register_data_from_path('dem', dem_path, SourceType.RASTER) - - if soil_path: - self.register_data_from_path('soil', soil_path, SourceType.VECTOR) - - if admin_dirs: - try: - gdf_admin = load_admin_boundaries(admin_dirs, target_crs=self.crs) - self.register_data('admin', DataSource( - name='admin', - source_type=SourceType.VECTOR, - data=gdf_admin, - crs=self.crs - )) - except Exception as e: - logger.warning(f"Could not load admin boundaries: {e}") - - if rainfall_path: - self.register_parameter(RainfallPlugin(weight=0.30)) - - if twi_path or dem_path: - self.register_parameter(TWIPlugin(weight=0.25)) - - logger.info("Drought risk engine configured") - - -def create_engine(hazard_type: str, config: Optional[EngineConfig] = None) -> RiskEngine: - """ - Factory function to create pre-configured engines - - Args: - hazard_type: 'flood', 'landslide', or 'drought' - config: Optional configuration - - Returns: - Configured RiskEngine instance - """ - engines = { - 'flood': FloodRiskEngine, - 'landslide': LandslideRiskEngine, - 'drought': DroughtRiskEngine - } - - hazard_type = hazard_type.lower() - if hazard_type not in engines: - raise ValueError(f"Unknown hazard type: {hazard_type}. " - f"Available: {list(engines.keys())}") - - return engines[hazard_type](config) diff --git a/engine/risk_engine.py b/engine/risk_engine.py deleted file mode 100644 index ee6acc5..0000000 --- a/engine/risk_engine.py +++ /dev/null @@ -1,296 +0,0 @@ -from pathlib import Path -from typing import Optional, List, Tuple, Any -import numpy as np -import geopandas as gpd -import rasterio -from rasterio.transform import from_bounds -from rasterio.features import geometry_mask - -from murasa.core.config import EngineConfig -from murasa.core.data_source import DataRegistry, DataSource, SourceType -from murasa.core.logger import logger, log_section, log_subsection, log_success -from murasa.core.base import ParameterPlugin, PostProcessorPlugin - - -class RiskEngine: - def __init__(self, config: Optional[EngineConfig] = None): - self.config = config or EngineConfig() - self.registry = DataRegistry() - self.parameters: List[ParameterPlugin] = [] - self.post_processors: List[PostProcessorPlugin] = [] - - self.grid_shape: Optional[Tuple[int, int]] = None - self.transform: Optional[Any] = None - self.crs: str = self.config.spatial.target_crs - - self.study_area_mask: Optional[np.ndarray] = None - self.admin_units: Optional[gpd.GeoDataFrame] = None - - self.risk_raster: Optional[np.ndarray] = None - self.exclusion_mask: Optional[np.ndarray] = None - - def register_data(self, name: str, source: DataSource) -> None: - self.registry.register(name, source) - - def register_data_from_path(self, name: str, path: Path, - source_type: SourceType) -> None: - source = DataSource( - name=name, - source_type=source_type, - path=path, - crs=self.crs - ) - self.registry.register(name, source) - - def auto_register_from_config(self) -> None: - log_subsection("Auto-registering data sources from factors") - - for name, factor in self.config.factors.items(): - if factor.source_path and factor.source_path.exists(): - source_type = (SourceType.RASTER if factor.source_type == 'raster' - else SourceType.VECTOR) - self.register_data_from_path(name, factor.source_path, source_type) - logger.info(f" {name}: {factor.source_path}") - elif factor.source_type == 'derived': - logger.info(f" {name}: derived (will be computed)") - else: - logger.warning(f" {name}: path not found or not specified") - - log_success(f"Registered {len(self.registry)} data sources") - - def register_parameter(self, plugin: ParameterPlugin) -> bool: - if not plugin.validate_requirements(self.registry): - logger.warning(f"Plugin '{plugin.name}' missing required data") - return False - - self.parameters.append(plugin) - log_success(f"Registered: {plugin.name} (weight={plugin.weight:.3f})") - return True - - def register_post_processor(self, plugin: PostProcessorPlugin) -> None: - self.post_processors.append(plugin) - log_success(f"Registered Post-Processor: {plugin.name}") - - def clear_parameters(self) -> None: - self.parameters.clear() - logger.info("Cleared all parameters") - - def normalize_weights(self) -> None: - total = sum(p.weight for p in self.parameters) - - if abs(total - 1.0) < 0.001: - return - - logger.warning(f"Weights sum to {total:.3f}, normalizing...") - for param in self.parameters: - param.weight /= total - - def setup_grid(self, bounds: Optional[Tuple] = None, - from_admin: bool = True) -> None: - study_area_key = self.config.spatial.study_area_key - if from_admin and self.registry.has(study_area_key): - admin_source = self.registry.get(study_area_key) - bounds = admin_source.data.total_bounds - logger.info(f"Grid bounds from {study_area_key} data") - - if bounds is None: - raise ValueError(f"Grid bounds required (provide bounds or {study_area_key} data)") - - resolution = self.config.spatial.resolution - width = int((bounds[2] - bounds[0]) / resolution) - height = int((bounds[3] - bounds[1]) / resolution) - - self.transform = from_bounds(*bounds, width, height) - self.grid_shape = (height, width) - - log_success(f"Grid: {width}×{height} pixels @ {resolution}m") - - def set_study_area(self, admin_gdf: Optional[gpd.GeoDataFrame] = None) -> None: - study_area_key = self.config.spatial.study_area_key - if admin_gdf is None: - if not self.registry.has(study_area_key): - logger.warning(f"No study area defined using key '{study_area_key}', processing full grid") - return - admin_gdf = self.registry.get(study_area_key).data - - self.admin_units = admin_gdf - - self.study_area_mask = geometry_mask( - admin_gdf.geometry, - transform=self.transform, - invert=True, - out_shape=self.grid_shape - ) - - log_success(f"Study area: {len(admin_gdf)} units from key '{study_area_key}'") - - def calculate_risk(self) -> np.ndarray: - log_section("CALCULATING RISK") - - if not self.parameters: - raise ValueError("No parameters registered") - - if self.grid_shape is None or self.transform is None: - raise ValueError("Grid not setup (call setup_grid first)") - - self.normalize_weights() - self.exclusion_mask = np.zeros(self.grid_shape, dtype=bool) - - risk_components = [] - - for plugin in self.parameters: - try: - component = plugin.process( - self.registry, - self.grid_shape, - self.transform, - self.crs - ) - - output_name = f"risk_{plugin.name}" - self.registry.register( - output_name, - DataSource(output_name, SourceType.RASTER, data=component, crs=self.crs) - ) - - if plugin.is_exclusion: - mask = (component > 0).astype(bool) - self.exclusion_mask |= mask - logger.info(f" Added exclusion mask from {plugin.name}: {mask.sum()} pixels") - else: - component = np.nan_to_num(component, nan=0.0) - weighted = component * plugin.weight - risk_components.append(weighted) - - except Exception as e: - logger.error(f" Failed: {e}") - import traceback - traceback.print_exc() - continue - - if not risk_components: - raise ValueError("No valid risk components computed!") - - self.risk_raster = np.sum(risk_components, axis=0).astype(np.float32) - - if self.exclusion_mask.any(): - self.risk_raster[self.exclusion_mask] = np.nan - logger.info(f" Total excluded area: {self.exclusion_mask.sum()} pixels") - - if self.study_area_mask is not None: - self.risk_raster[~self.study_area_mask] = np.nan - - log_section("RISK CALCULATION COMPLETE") - logger.info(f"Range: {np.nanmin(self.risk_raster):.3f} - " - f"{np.nanmax(self.risk_raster):.3f}") - logger.info(f"Mean: {np.nanmean(self.risk_raster):.3f}") - - return self.risk_raster - - def export_raster(self, output_path: Path) -> None: - if self.risk_raster is None: - raise ValueError("No risk raster calculated") - - output_path.parent.mkdir(parents=True, exist_ok=True) - - profile = { - 'driver': 'GTiff', - 'height': self.grid_shape[0], - 'width': self.grid_shape[1], - 'count': 1, - 'dtype': np.float32, - 'crs': self.crs, - 'transform': self.transform, - 'nodata': np.nan, - 'compress': 'lzw' - } - - with rasterio.open(output_path, 'w', **profile) as dst: - dst.write(self.risk_raster, 1) - - log_success(f"Raster saved: {output_path}") - - def export_statistics(self, output_path: Path) -> None: - import json - - stats = { - 'parameters': { - p.name: { - 'weight': p.weight, - 'statistics': p.get_statistics() - } - for p in self.parameters - }, - 'risk': { - 'min': float(np.nanmin(self.risk_raster)), - 'max': float(np.nanmax(self.risk_raster)), - 'mean': float(np.nanmean(self.risk_raster)), - 'std': float(np.nanstd(self.risk_raster)) - } - } - - output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, 'w') as f: - json.dump(stats, f, indent=2) - - log_success(f"Statistics saved: {output_path}") - - def export_all(self, output_dir: Optional[Path] = None) -> None: - if output_dir is None: - output_dir = self.config.output.output_dir - - output_dir.mkdir(parents=True, exist_ok=True) - - self.export_raster(output_dir / "risk_map.tif") - - self.export_statistics(output_dir / "statistics.json") - - log_success(f"All outputs saved to: {output_dir}") - - if self.post_processors: - log_section("POST-PROCESSING") - for pp in self.post_processors: - try: - pp.post_process( - self.registry, - self.risk_raster, - self.transform, - self.crs, - output_dir, - config=self.config - ) - except Exception as e: - logger.error(f"Post-processor {pp.name} failed: {e}") - import traceback - traceback.print_exc() - - def print_summary(self) -> None: - log_section("ENGINE SUMMARY") - - logger.info(f"Data sources: {len(self.registry)}") - self.registry.print_summary() - - logger.info(f"\nParameters: {len(self.parameters)}") - for param in self.parameters: - logger.info(f" - {param}") - - if self.grid_shape: - logger.info(f"\nGrid: {self.grid_shape[1]}×{self.grid_shape[0]} pixels") - logger.info(f"Resolution: {self.config.spatial.resolution}m") - - if self.admin_units is not None: - logger.info(f"\nStudy area: {len(self.admin_units)} units") - - def run(self) -> np.ndarray: - log_section("RISK ENGINE - FULL RUN") - - self.auto_register_from_config() - self.setup_grid() - self.set_study_area() - - risk = self.calculate_risk() - - self.export_all() - - log_section("COMPLETE") - return risk diff --git a/examples/flood_bandung_config.yaml b/examples/flood_bandung_config.yaml index 49450ea..9fdc021 100644 --- a/examples/flood_bandung_config.yaml +++ b/examples/flood_bandung_config.yaml @@ -1,75 +1,39 @@ -project: - name: "Bandung Raya Flood Susceptibility Analysis (December)" - description: "Analyzes stuff" +name: bandung_flood_example +description: "Example flood susceptibility config for the Rust port" +analysis_type: susceptibility spatial: - target_crs: "EPSG:32748" - resolution: 10 - filter_city: [ "KOTA BANDUNG", "KOTA CIMAHI", "KAB. BANDUNG", "KAB. BANDUNG BARAT" ] - study_area_key: "admin" + target_crs: "EPSG:4326" + metric_crs: "EPSG:3857" + resolution: 10.0 + study_area_key: admin -paths: - dem: "../../data/DEMNAS/merged/new2026/filled_dem_tif.tif" - slope: "../../data/DEMNAS/merged/new2026/filled_slope_tif.tif" - twi: "../../data/DEMNAS/merged/new2026/twi.tif" - rainfall: "../../data/bmkg_rainfall_data/curah-hujan-oktober-2025.geojson" - admin_dirs: - - "../../data/wilayah-administrasi/KAB. BANDUNG" - - "../../data/wilayah-administrasi/KAB. BANDUNG BARAT" - - "../../data/wilayah-administrasi/KOTA BANDUNG" - - "../../data/wilayah-administrasi/KOTA CIMAHI" - river: "../../data/river/SUNGAI_LN_25K.shp" - output_dir: "../output" - -weights: - rainfall: 0.15 - elevation: 0.15 - slope: 0.18 - twi: 0.22 - proximity: 0.10 - land_use: 0.20 +output: + dir: "./output/rust_example" + formats: ["geojson", "geopackage"] + generate_report: true -parameters: - rainfall: - column_name: "gridcode" - year: 2025 - month: 10 - value_mapping: - 1: 10 - 2: 35 - 3: 75 - 4: 125 - 5: 175 - 6: 250 - 7: 350 - 8: 450 - 9: 600 +classification: + method: quantile + num_classes: 5 + class_names: ["Very Low", "Low", "Moderate", "High", "Very High"] - land_use: - coefficients: - water: 1.0 - settlement: 0.85 - building: 0.90 - road: 0.90 - paddy: 0.70 - plantation: 0.40 - forest: 0.20 - shrub: 0.35 - default: 0.50 - priorities: - water: 100 - road: 90 - building: 80 - settlement: 70 - paddy: 50 - plantation: 40 - shrub: 30 - forest: 20 - default: 0 +weights: + rainfall: 0.25 + elevation: 0.20 + slope: 0.15 + twi: 0.15 + proximity: 0.15 + land_use: 0.10 - proximity: - feature: "river" - max_distance: 500 +paths: + dem: "data/bandung/dem.tif" + slope: "data/bandung/slope.tif" + twi: "data/bandung/twi.tif" + rainfall: "data/bandung/rainfall.shp" + river: "data/bandung/river.shp" + land_use: "data/bandung/landuse.shp" + admin_dirs: ["data/bandung/admin"] + output_dir: "./output/rust_example" - exclusion: - features: [ "river", "reservoir" ] \ No newline at end of file +admin_filename: "ADMINISTRASIDESA_AR_25K.shp" diff --git a/examples/run_flood_analysis.py b/examples/run_flood_analysis.py deleted file mode 100644 index f110161..0000000 --- a/examples/run_flood_analysis.py +++ /dev/null @@ -1,170 +0,0 @@ -import sys -import argparse -from pathlib import Path - -from murasa.core.config import EngineConfig -from murasa.engine.risk_engine import RiskEngine -from murasa.core.data_source import DataSource, SourceType -from murasa.core.base import LoaderRegistry -from murasa.plugins import ( - RainfallPlugin, ElevationPlugin, SlopePlugin, - TWIPlugin, ProximityPlugin, LandUsePlugin, WaterExclusionPlugin -) -from murasa.plugins.reporting import VectorExplainabilityPlugin -from murasa.core.logger import logger, log_section, log_success, log_error - -PLUGIN_MAP = { - 'rainfall': lambda weight, params: RainfallPlugin( - weight=weight, - column_name=params.get('column_name', params.get('column', 'gridcode')), - value_mapping=params.get('value_mapping') - ), - 'elevation': lambda weight, params: ElevationPlugin( - weight=weight, - inverse=params.get('invert', True) - ), - 'slope': lambda weight, params: SlopePlugin( - weight=weight, - inverse=params.get('invert', True) - ), - 'twi': lambda weight, params: TWIPlugin( - weight=weight - ), - 'land_use': lambda weight, params: LandUsePlugin( - weight=weight, - coefficients=params.get('coefficients'), - priorities=params.get('priorities') - ), - 'proximity': lambda weight, params: ProximityPlugin( - weight=weight, - feature_name=params.get('feature', 'river'), - max_distance=params.get('max_distance', 500) - ), -} - - -def parse_args(): - parser = argparse.ArgumentParser(description="Flood Susceptibility Analysis") - parser.add_argument( - "--config", "-c", - type=Path, - default=Path("flood_bandung_config.yaml"), - help="Path to YAML config file" - ) - return parser.parse_args() - - -def load_config(config_path: Path) -> EngineConfig: - log_section("Loading configuration") - - if not config_path.exists(): - log_error(f"Config not found: {config_path}") - sys.exit(1) - - config = EngineConfig.from_yaml(config_path) - - logger.info(f" Analysis : {config.name}") - logger.info(f" CRS : {config.spatial.target_crs}") - logger.info(f" Resolution: {config.spatial.resolution}m") - - if hasattr(config, 'factors') and config.factors: - logger.info(f" Factors : {list(config.factors.keys())}") - if hasattr(config, 'weights') and config.weights: - logger.info(f" Weights : {config.weights}") - - return config - - -def init_engine(config: EngineConfig) -> RiskEngine: - log_section("Init engine") - - engine = RiskEngine(config) - engine.auto_register_from_config() - - return engine - - -def load_data(config: EngineConfig, engine: RiskEngine) -> None: - log_section("Loading data") - - loader_registry = LoaderRegistry() - loader_registry.auto_discover(Path(__file__).parent.parent / "plugins") - loader_registry.load_all(config, engine.registry) - - -def register_parameters(config: EngineConfig, engine: RiskEngine) -> None: - log_section("Registering risk parameters") - - registered = 0 - - if hasattr(config, 'weights') and config.weights: - weights = config.weights - params_cfg = getattr(config, 'plugin_parameters', - getattr(config, 'parameters', {})) - - for factor_name, factory in PLUGIN_MAP.items(): - weight = getattr(weights, factor_name, None) - if weight is None or weight <= 0: - continue - - factor_params = params_cfg.get(factor_name, {}) - plugin = factory(weight, factor_params) - if engine.register_parameter(plugin): - registered += 1 - - elif hasattr(config, 'factors') and config.factors: - for factor_name, factor_cfg in config.factors.items(): - if factor_name not in PLUGIN_MAP: - continue - - params = factor_cfg.parameters if hasattr(factor_cfg, 'parameters') else {} - plugin = PLUGIN_MAP[factor_name](factor_cfg.weight, params) - if engine.register_parameter(plugin): - registered += 1 - - engine.register_parameter(WaterExclusionPlugin()) - registered += 1 - - engine.register_post_processor(VectorExplainabilityPlugin()) - - logger.info(f" Total: {registered} parameter(s) + 1 post-processor registered") - - -def run_analysis(engine: RiskEngine) -> None: - log_section("Run complete analysis") - - engine.setup_grid() - engine.set_study_area() - - risk_raster = engine.calculate_risk() - - engine.export_all() - - log_success(f"Results saved to: {engine.config.output.output_dir.resolve()}") - - -def main(): - args = parse_args() - - log_section("Flood susceptibility pipeline") - logger.info(f"Config: {args.config}") - - config = load_config(args.config) - engine = init_engine(config) - load_data(config, engine) - register_parameters(config, engine) - - engine.print_summary() - - try: - run_analysis(engine) - log_section("Completed") - except Exception as e: - log_error(f"Pipeline failed: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/murasa-cli/Cargo.toml b/murasa-cli/Cargo.toml new file mode 100644 index 0000000..c8fe092 --- /dev/null +++ b/murasa-cli/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "murasa-cli" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +description = "CLI for running Murasa geospatial risk analyses" + +[[bin]] +name = "murasa" +path = "src/main.rs" + +[dependencies] +murasa-core = { path = "../murasa-core" } +murasa-engine = { path = "../murasa-engine" } +murasa-plugins = { path = "../murasa-plugins" } +anyhow = { workspace = true } +clap = { version = "4.5", features = ["derive"] } +log = { workspace = true } +env_logger = { workspace = true } +serde_yaml = { workspace = true } diff --git a/murasa-cli/src/main.rs b/murasa-cli/src/main.rs new file mode 100644 index 0000000..a0cde07 --- /dev/null +++ b/murasa-cli/src/main.rs @@ -0,0 +1,145 @@ +use std::path::PathBuf; +use anyhow::Result; +use clap::Parser; +use murasa_core::{ + config::EngineConfig, + logger::{LoggerSetup, log_section, log_subsection, log_success, log_error}, + base::LoaderRegistry, +}; +use murasa_engine::RiskEngine; +use murasa_plugins::{ + ElevationPlugin, SlopePlugin, RainfallPlugin, + TWIPlugin, ProximityPlugin, LandUsePlugin, + WaterExclusionPlugin, VectorExplainabilityPlugin, + loaders::{AdminBoundariesLoader, RiversLoader, LandUseLoader}, +}; + +#[derive(Parser, Debug)] +#[command(name = "murasa", about = "Geospatial Risk Assessment Engine", version)] +struct Cli { + /// Path to YAML configuration file. + #[arg(short, long, default_value = "flood_bandung_config.yaml")] + config: PathBuf, +} + +fn main() -> Result<()> { + LoggerSetup::init("murasa", None)?; + let args = Cli::parse(); + + log_section("Flood susceptibility pipeline", 70); + log::info!("Config: {}", args.config.display()); + + let mut config = EngineConfig::from_yaml(&args.config)?; + log_subsection("Loading configuration", 70); + log::info!(" Analysis : {}", config.name); + log::info!(" CRS : {}", config.spatial.target_crs); + log::info!(" Resolution: {}m", config.spatial.resolution); + if !config.factors.is_empty() { + log::info!(" Factors : {:?}", config.factors.keys().collect::>()); + } + + log_subsection("Init engine", 70); + let mut engine = RiskEngine::new(config.clone()); + engine.auto_register_from_config(); + + log_subsection("Loading data", 70); + let mut loader_registry = LoaderRegistry::new(); + loader_registry.register(Box::new(AdminBoundariesLoader::from_config(&config))); + if let Some(loader) = RiversLoader::from_config(&config) { + loader_registry.register(Box::new(loader)); + } + if let Some(loader) = LandUseLoader::from_config(&config) { + loader_registry.register(Box::new(loader)); + } + loader_registry.load_all(&config, &mut engine.registry)?; + + log_subsection("Registering risk parameters", 70); + let mut registered = 0usize; + + if let Some(ref weights) = config.weights { + let params_cfg = &config.plugin_parameters; + macro_rules! register_if_weighted { + ($factor:ident, $plugin:expr) => { + let w = weights.$factor; + if w > 0.0 { + if engine.register_parameter($plugin) { + registered += 1; + } + } + }; + } + register_if_weighted!(rainfall, Box::new(RainfallPlugin::new( + weights.rainfall, + params_cfg.get("rainfall") + .and_then(|v| v.get("column_name")).and_then(|v| v.as_str()) + .unwrap_or("gridcode"), + None, + ))); + register_if_weighted!(elevation, Box::new(ElevationPlugin::new( + weights.elevation, + params_cfg.get("elevation") + .and_then(|v| v.get("invert")).and_then(|v| v.as_bool()) + .unwrap_or(true), + ))); + register_if_weighted!(slope, Box::new(SlopePlugin::new( + weights.slope, + params_cfg.get("slope") + .and_then(|v| v.get("invert")).and_then(|v| v.as_bool()) + .unwrap_or(true), + ))); + register_if_weighted!(twi, Box::new(TWIPlugin::new(weights.twi))); + register_if_weighted!(proximity, Box::new(ProximityPlugin::new( + weights.proximity, + params_cfg.get("proximity") + .and_then(|v| v.get("feature")).and_then(|v| v.as_str()) + .unwrap_or("river"), + params_cfg.get("proximity") + .and_then(|v| v.get("max_distance")).and_then(|v| v.as_f64()) + .unwrap_or(500.0), + ))); + register_if_weighted!(land_use, Box::new(LandUsePlugin::new( + weights.land_use, + std::collections::HashMap::new(), + None, + ))); + } else if !config.factors.is_empty() { + for (factor_name, factor_cfg) in &config.factors { + let plugin: Option> = match factor_name.as_str() { + "elevation" => Some(Box::new(ElevationPlugin::new(factor_cfg.weight, true))), + "slope" => Some(Box::new(SlopePlugin::new(factor_cfg.weight, true))), + "rainfall" => Some(Box::new(RainfallPlugin::new(factor_cfg.weight, "gridcode", None))), + "twi" => Some(Box::new(TWIPlugin::new(factor_cfg.weight))), + "proximity" => Some(Box::new(ProximityPlugin::new(factor_cfg.weight, "river", 500.0))), + "land_use" => Some(Box::new(LandUsePlugin::new(factor_cfg.weight, std::collections::HashMap::new(), None))), + _ => None, + }; + if let Some(p) = plugin { + if engine.register_parameter(p) { + registered += 1; + } + } + } + } + + engine.register_parameter(Box::new(WaterExclusionPlugin::new())); + registered += 1; + engine.register_post_processor(Box::new(VectorExplainabilityPlugin::new())); + + log::info!(" Total: {} parameter(s) + 1 post-processor registered", registered); + + engine.print_summary(); + + log_subsection("Run complete analysis", 70); + match engine.run() { + Ok(_) => { + log_success(&format!("Results saved to: {}", engine.config.output.output_dir.display())); + log_section("Completed", 70); + } + Err(e) => { + log_error(&format!("Pipeline failed: {}", e)); + std::process::exit(1); + } + } + + Ok(()) +} diff --git a/murasa-core/Cargo.toml b/murasa-core/Cargo.toml new file mode 100644 index 0000000..21ec0a1 --- /dev/null +++ b/murasa-core/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "murasa-core" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +description = "Core types and traits for the Murasa geospatial risk engine" + +[dependencies] +ndarray = { workspace = true } +ndarray-stats = { workspace = true } +serde = { workspace = true } +serde_yaml = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +anyhow = { workspace = true } +log = { workspace = true } +env_logger = { workspace = true } +geo = { workspace = true } +geozero = { workspace = true } +geo-types = { workspace = true } +gdal = { workspace = true } +pathdiff = { workspace = true } + +[dev-dependencies] +tempfile = "3.10" diff --git a/murasa-core/src/analysis.rs b/murasa-core/src/analysis.rs new file mode 100644 index 0000000..d877540 --- /dev/null +++ b/murasa-core/src/analysis.rs @@ -0,0 +1,15 @@ +use ndarray::Array2; +use crate::data_source::DataSource; + +/// Calculate zonal statistics for vector features against a raster source. +pub fn calculate_zonal_stats( + _vector_gdf: &DataSource, + _raster_source: &DataSource, + stats: &[&str], + _nodata: f32, + _categorical: bool, +) -> anyhow::Result>> { + log::info!("Calculating zonal stats for stats: {:?}", stats); + // Placeholder: return empty stats maps. + Ok(Vec::new()) +} diff --git a/murasa-core/src/base.rs b/murasa-core/src/base.rs new file mode 100644 index 0000000..93a041f --- /dev/null +++ b/murasa-core/src/base.rs @@ -0,0 +1,314 @@ +use std::collections::HashMap; +use ndarray::Array2; +use crate::data_source::DataRegistry; +use crate::processing::ProcessingContext; + +/// Statistics for a processed parameter. +#[derive(Debug, Clone, Default)] +pub struct ParameterStatistics { + /// Minimum value. + pub min: f64, + /// Maximum value. + pub max: f64, + /// Mean value. + pub mean: f64, + /// Standard deviation. + pub std: f64, + /// Number of valid (finite) pixels. + pub valid_pixels: usize, + /// Total number of pixels. + pub total_pixels: usize, +} + +/// Core trait for risk parameters. +pub trait ParameterPlugin: std::fmt::Debug + Send + Sync { + /// Plugin identifier. + fn name(&self) -> &str; + /// Relative weight (0..1). + fn weight(&self) -> f64; + /// Set the weight. + fn set_weight(&mut self, weight: f64); + /// Whether this parameter acts as an exclusion mask. + fn is_exclusion(&self) -> bool; + /// Validate that required data exists in the registry. + fn validate_requirements(&self, registry: &DataRegistry) -> bool; + /// Process data and return a normalized risk raster (0..1). + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result>; + /// Compute statistics on the last result. + fn statistics(&self, result: &Array2) -> ParameterStatistics { + // Placeholder: real impl would iterate the array. + ParameterStatistics::default() + } + /// Log processing start. + fn log_processing(&self) { + log::info!(" Processing: {} (weight={:.3})", self.name(), self.weight()); + } + /// Log processing result. + fn log_result(&self, result: &Array2) { + let stats = self.statistics(result); + log::info!(" Range: {:.3} - {:.3}", stats.min, stats.max); + } +} + +/// Raster-specific parameter with built-in normalization. +pub trait RasterParameterPlugin: ParameterPlugin { + /// Registry keys to search (first found wins). + fn source_keys(&self) -> &'static [&'static str]; + /// Whether to invert the result (1 - normalized). + fn inverse(&self) -> bool; + /// Normalization method name. + fn normalization(&self) -> &str; + + /// Optional custom transformation before normalization. + fn transform_data(&self, data: Array2, _ctx: &ProcessingContext) -> Array2 { + data + } +} + +/// Affine transform wrapper. +#[derive(Debug, Clone, Copy)] +pub struct AffineTransform { + /// Pixel width (a). + pub a: f64, + /// Row rotation (b). + pub b: f64, + /// Column rotation (d). + pub d: f64, + /// Pixel height (e). + pub e: f64, + /// X offset (c). + pub c: f64, + /// Y offset (f). + pub f: f64, +} + +impl AffineTransform { + /// Create a transform from bounds and grid dimensions. + pub fn from_bounds( + left: f64, + bottom: f64, + right: f64, + top: f64, + width: usize, + height: usize, + ) -> Self { + let a = (right - left) / width as f64; + let e = (bottom - top) / height as f64; + Self { a, b: 0.0, d: 0.0, e, c: left, f: top } + } + + /// Pixel resolution (absolute value of a). + pub fn resolution(&self) -> f64 { + self.a.abs() + } +} + +/// Post-processor trait for result vectorization / reporting. +pub trait PostProcessorPlugin: std::fmt::Debug + Send + Sync { + /// Processor name. + fn name(&self) -> &str; + /// Execute post-processing logic. + fn post_process( + &mut self, + registry: &mut DataRegistry, + risk_raster: &Array2, + transform: &AffineTransform, + crs: &str, + output_dir: &std::path::Path, + config: &crate::config::EngineConfig, + ) -> anyhow::Result<()>; +} + +/// Data loader plugin base trait. +pub trait DataLoaderPlugin: std::fmt::Debug + Send + Sync { + /// Keys this loader registers into the DataRegistry. + fn provides(&self) -> &'static [&'static str]; + /// Keys this loader depends on. + fn requires(&self) -> &'static [&'static str]; + /// Name of the loader. + fn name(&self) -> &str; + /// Check if this loader should run given the engine config. + fn can_handle(&self, config: &crate::config::EngineConfig) -> bool; + /// Execute loading and register results. + fn load(&mut self, registry: &mut DataRegistry) -> anyhow::Result>; + /// Log a loading message. + fn log_loading(&self, message: &str) { + log::info!("[{}] {}", self.name(), message); + } +} + +use crate::data_source::DataSource; + +/// Registry for parameter plugins. +#[derive(Debug, Default)] +pub struct PluginRegistry { + plugins: HashMap Box + Send + Sync>>, +} + +impl PluginRegistry { + /// Create an empty registry. + pub fn new() -> Self { + Self::default() + } + + /// Register a plugin factory. + pub fn register(&mut self, name: impl Into, factory: F) + where + F: Fn() -> Box + Send + Sync + 'static, + { + let name = name.into(); + log::debug!("Registered plugin: {}", name); + self.plugins.insert(name, Box::new(factory)); + } + + /// Get a plugin factory by name. + pub fn get(&self, name: &str) -> anyhow::Result<&(dyn Fn() -> Box + Send + Sync)> { + self.plugins.get(name) + .map(|f| f.as_ref()) + .ok_or_else(|| anyhow::anyhow!("Plugin '{}' not found", name)) + } + + /// List registered plugin names. + pub fn list_plugins(&self) -> Vec<&String> { + self.plugins.keys().collect() + } +} + +/// Error type for circular dependencies in the loader graph. +#[derive(Debug, thiserror::Error)] +#[error("Circular dependency detected among: {0:?}")] +pub struct CircularDependencyError(pub Vec); + +/// Registry for data loader plugins with topological ordering. +#[derive(Debug, Default)] +pub struct LoaderRegistry { + loaders: Vec>, +} + +impl LoaderRegistry { + /// Create an empty registry. + pub fn new() -> Self { + Self::default() + } + + /// Register a loader instance. + pub fn register(&mut self, loader: Box) { + self.loaders.push(loader); + } + + /// Resolve execution order via Kahn's algorithm. + pub fn resolve_order(&self) -> Result, CircularDependencyError> { + // Build provider map + let mut provider_map: HashMap<&str, &dyn DataLoaderPlugin> = HashMap::new(); + for loader in &self.loaders { + for key in loader.provides() { + provider_map.insert(key, loader.as_ref()); + } + } + + // Build adjacency graph (dependency -> dependents) + let mut in_degree: HashMap<&str, usize> = HashMap::new(); + let mut graph: HashMap<&str, Vec<&str>> = HashMap::new(); + + for loader in &self.loaders { + let name = loader.name(); + in_degree.entry(name).or_insert(0); + for req in loader.requires() { + if let Some(dep) = provider_map.get(req) { + if dep.name() != name { + graph.entry(dep.name()).or_default().push(name); + *in_degree.entry(name).or_insert(0) += 1; + } + } + } + } + + let mut queue: Vec<&str> = in_degree.iter() + .filter(|(_, °)| deg == 0) + .map(|(name, _)| *name) + .collect(); + queue.sort_unstable(); + + let mut ordered: Vec<&dyn DataLoaderPlugin> = Vec::new(); + let mut visited = std::collections::HashSet::new(); + + while let Some(name) = queue.pop() { + if !visited.insert(name) { + continue; + } + if let Some(loader) = self.loaders.iter().find(|l| l.name() == name) { + ordered.push(loader.as_ref()); + } + if let Some(dependents) = graph.get(name) { + for dep in dependents { + if let Some(deg) = in_degree.get_mut(dep) { + *deg -= 1; + if *deg == 0 { + queue.push(dep); + } + } + } + } + } + + if ordered.len() != self.loaders.len() { + let unresolved: Vec = self.loaders.iter() + .filter(|l| !ordered.iter().any(|o| o.name() == l.name())) + .map(|l| l.name().to_string()) + .collect(); + return Err(CircularDependencyError(unresolved)); + } + + Ok(ordered) + } + + /// Execute all loaders that match the config, in dependency order. + pub fn load_all( + &mut self, + config: &crate::config::EngineConfig, + registry: &mut DataRegistry, + ) -> anyhow::Result<()> { + let active: Vec<&mut Box> = self.loaders.iter_mut() + .filter(|l| l.can_handle(config)) + .collect(); + + log::info!("\n{}", "=".repeat(70)); + log::info!("DATA LOADING PHASE"); + log::info!("{}", "=".repeat(70)); + + // In a real implementation we would topologically sort and run. + // For now, run sequentially. + let mut executed = 0usize; + for loader in active { + log::info!("\n{} (provides={:?}, requires={:?})", + loader.name(), loader.provides(), loader.requires()); + match loader.load(registry) { + Ok(sources) => { + for (key, source) in sources { + registry.register(key, source); + } + executed += 1; + } + Err(e) => { + log::error!("Loader {} failed: {}", loader.name(), e); + } + } + } + + log::info!("\n{}", "=".repeat(70)); + log::info!("DATA LOADING COMPLETE ({}/{} loader(s) executed)", executed, self.loaders.len()); + log::info!("{}\n", "=".repeat(70)); + Ok(()) + } + + /// List all loader names. + pub fn list_loaders(&self) -> Vec<&str> { + self.loaders.iter().map(|l| l.name()).collect() + } +} diff --git a/murasa-core/src/config.rs b/murasa-core/src/config.rs new file mode 100644 index 0000000..7ba0677 --- /dev/null +++ b/murasa-core/src/config.rs @@ -0,0 +1,372 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use serde::{Deserialize, Serialize}; +use anyhow::{Context, Result}; + +/// Configuration for a single risk factor. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FactorConfig { + /// Human-readable name of the factor. + pub name: String, + /// Weight in the MCDA calculation (0..1). + #[serde(default)] + pub weight: f64, + /// Path to the source dataset. + pub source_path: Option, + /// Source type: raster, vector, derived, etc. + #[serde(default = "default_source_type")] + pub source_type: String, + /// Optional processor override. + pub processor: Option, + /// Free-form processor parameters. + #[serde(default)] + pub parameters: HashMap, +} + +fn default_source_type() -> String { + "raster".to_string() +} + +impl FactorConfig { + /// Create a new factor config with the given name. + pub fn new(name: impl Into) -> Self { + Self { + name: name.into(), + weight: 0.0, + source_path: None, + source_type: default_source_type(), + processor: None, + parameters: HashMap::new(), + } + } +} + +/// Spatial reference and grid configuration. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SpatialConfig { + /// Target CRS (e.g. EPSG:4326). + #[serde(default = "default_target_crs")] + pub target_crs: String, + /// Metric CRS for distance calculations (e.g. EPSG:3857). + #[serde(default = "default_metric_crs")] + pub metric_crs: String, + /// Target pixel resolution in meters. + #[serde(default = "default_resolution")] + pub resolution: f64, + /// Registry key used for the study area. + #[serde(default = "default_study_area_key")] + pub study_area_key: String, + /// Optional province filter. + pub filter_province: Option>, + /// Optional city filter. + pub filter_city: Option>, + /// Optional district filter. + pub filter_district: Option>, +} + +fn default_target_crs() -> String { "EPSG:4326".to_string() } +fn default_metric_crs() -> String { "EPSG:3857".to_string() } +fn default_resolution() -> f64 { 10.0 } +fn default_study_area_key() -> String { "admin".to_string() } + +impl Default for SpatialConfig { + fn default() -> Self { + Self { + target_crs: default_target_crs(), + metric_crs: default_metric_crs(), + resolution: default_resolution(), + study_area_key: default_study_area_key(), + filter_province: None, + filter_city: None, + filter_district: None, + } + } +} + +impl SpatialConfig { + /// Return the most specific active filter level. + pub fn get_filter_level(&self) -> &str { + if self.filter_district.is_some() { + "district" + } else if self.filter_city.is_some() { + "city" + } else if self.filter_province.is_some() { + "province" + } else { + "full" + } + } + + /// Return the active filter values, if any. + pub fn get_active_filter(&self) -> Option<&Vec> { + self.filter_district.as_ref() + .or_else(|| self.filter_city.as_ref()) + .or_else(|| self.filter_province.as_ref()) + } +} + +/// Output format and directory configuration. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OutputConfig { + /// Directory to write results. + #[serde(default = "default_output_dir")] + pub output_dir: PathBuf, + /// Output formats (geojson, geopackage, csv). + #[serde(default = "default_formats")] + pub formats: Vec, + /// Whether to emit a human-readable report. + #[serde(default = "default_true")] + pub generate_report: bool, +} + +fn default_output_dir() -> PathBuf { PathBuf::from("./output") } +fn default_formats() -> Vec { vec!["geojson".to_string()] } +fn default_true() -> bool { true } + +impl Default for OutputConfig { + fn default() -> Self { + Self { + output_dir: default_output_dir(), + formats: default_formats(), + generate_report: true, + } + } +} + +/// Classification / breaks configuration. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ClassificationConfig { + /// Method: quantile, jenks, equal_interval. + #[serde(default = "default_class_method")] + pub method: String, + /// Number of classes. + #[serde(default = "default_num_classes")] + pub num_classes: usize, + /// Human-readable class labels. + #[serde(default = "default_class_names")] + pub class_names: Vec, + /// Optional hex color palette. + #[serde(default)] + pub class_colors: Vec, + /// Optional explicit thresholds. + pub thresholds: Option>, +} + +fn default_class_method() -> String { "quantile".to_string() } +fn default_num_classes() -> usize { 5 } +fn default_class_names() -> Vec { + vec![ + "Very Low".to_string(), + "Low".to_string(), + "Moderate".to_string(), + "High".to_string(), + "Very High".to_string(), + ] +} + +impl Default for ClassificationConfig { + fn default() -> Self { + Self { + method: default_class_method(), + num_classes: default_num_classes(), + class_names: default_class_names(), + class_colors: Vec::new(), + thresholds: None, + } + } +} + +/// Legacy path-centric configuration. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct PathsConfig { + /// Digital Elevation Model path. + pub dem: Option, + /// Pre-computed slope path. + pub slope: Option, + /// Topographic Wetness Index path. + pub twi: Option, + /// Rainfall data path. + pub rainfall: Option, + /// River network path. + pub river: Option, + /// Land-use data path. + pub land_use: Option, + /// Administrative boundary directories. + #[serde(default)] + pub admin_dirs: Vec, + /// Output directory override. + #[serde(default = "default_output_dir")] + pub output_dir: PathBuf, +} + +/// Legacy weight-centric configuration. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct WeightsConfig { + pub rainfall: f64, + pub elevation: f64, + pub slope: f64, + pub twi: f64, + pub proximity: f64, + pub land_use: f64, +} + +impl WeightsConfig { + /// Sum of all weights. + pub fn total(&self) -> f64 { + self.rainfall + self.elevation + self.slope + + self.twi + self.proximity + self.land_use + } + + /// Convert to a name -> weight map. + pub fn as_dict(&self) -> HashMap { + let mut m = HashMap::new(); + m.insert("rainfall".to_string(), self.rainfall); + m.insert("elevation".to_string(), self.elevation); + m.insert("slope".to_string(), self.slope); + m.insert("twi".to_string(), self.twi); + m.insert("proximity".to_string(), self.proximity); + m.insert("land_use".to_string(), self.land_use); + m + } +} + +/// Top-level engine configuration. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EngineConfig { + /// Analysis name. + #[serde(default = "default_unnamed")] + pub name: String, + /// Human-readable description. + #[serde(default)] + pub description: String, + /// Analysis type: susceptibility, hazard, vulnerability, risk. + #[serde(default = "default_analysis_type")] + pub analysis_type: String, + /// Spatial settings. + #[serde(default)] + pub spatial: SpatialConfig, + /// Output settings. + #[serde(default)] + pub output: OutputConfig, + /// Classification settings. + #[serde(default)] + pub classification: ClassificationConfig, + /// Factor map (new-style config). + #[serde(default)] + pub factors: HashMap, + /// Administrative directories. + #[serde(default)] + pub admin_dirs: Vec, + /// Default admin filename pattern. + #[serde(default = "default_admin_filename")] + pub admin_filename: String, + /// Free-form parameters. + #[serde(default)] + pub parameters: HashMap, + /// Legacy paths block. + pub paths: Option, + /// Legacy weights block. + pub weights: Option, + /// Legacy plugin parameters block. + #[serde(default)] + pub plugin_parameters: HashMap, +} + +fn default_unnamed() -> String { "unnamed_analysis".to_string() } +fn default_analysis_type() -> String { "susceptibility".to_string() } +fn default_admin_filename() -> String { "ADMINISTRASIDESA_AR_25K.shp".to_string() } + +impl Default for EngineConfig { + fn default() -> Self { + Self { + name: default_unnamed(), + description: String::new(), + analysis_type: default_analysis_type(), + spatial: SpatialConfig::default(), + output: OutputConfig::default(), + classification: ClassificationConfig::default(), + factors: HashMap::new(), + admin_dirs: Vec::new(), + admin_filename: default_admin_filename(), + parameters: HashMap::new(), + paths: None, + weights: None, + plugin_parameters: HashMap::new(), + } + } +} + +impl EngineConfig { + /// Load configuration from a YAML file. + pub fn from_yaml(path: &Path) -> Result { + let contents = std::fs::read_to_string(path) + .with_context(|| format!("Failed to read config: {}", path.display()))?; + let mut config: EngineConfig = serde_yaml::from_str(&contents) + .with_context(|| "Failed to parse YAML configuration")?; + config.validate()?; + Ok(config) + } + + /// Save configuration to a YAML file. + pub fn to_yaml(&self, path: &Path) -> Result<()> { + std::fs::create_dir_all(path.parent().unwrap_or(Path::new(".")))?; + let contents = serde_yaml::to_string(self)?; + std::fs::write(path, contents)?; + Ok(()) + } + + /// Save configuration to a JSON file. + pub fn to_json(&self, path: &Path) -> Result<()> { + std::fs::create_dir_all(path.parent().unwrap_or(Path::new(".")))?; + let contents = serde_json::to_string_pretty(self)?; + std::fs::write(path, contents)?; + Ok(()) + } + + /// Return a map of factor names to weights. + pub fn get_weights(&self) -> HashMap { + self.factors.iter().map(|(k, v)| (k.clone(), v.weight)).collect() + } + + /// Resolve a factor source path by name. + pub fn get_factor_path(&self, factor_name: &str) -> Option<&Path> { + self.factors.get(factor_name).and_then(|f| f.source_path.as_deref()) + } + + /// Validate that weights sum to 1.0 (if factors are defined). + pub fn validate_weights(&self) -> Result<()> { + if self.factors.is_empty() { + return Ok(()); + } + let total: f64 = self.factors.values().map(|f| f.weight).sum(); + if (total - 1.0).abs() > 0.001 { + anyhow::bail!("Weights must sum to 1.0, got {:.3}", total); + } + Ok(()) + } + + /// Normalize factor weights so they sum to 1.0. + pub fn normalize_weights(&mut self) { + let total: f64 = self.factors.values().map(|f| f.weight).sum(); + if total > 0.0 { + for factor in self.factors.values_mut() { + factor.weight /= total; + } + } + } + + /// Run full validation and prepare output directories. + pub fn validate(&mut self) -> Result<()> { + self.validate_weights()?; + std::fs::create_dir_all(&self.output.output_dir)?; + // Check missing factor paths + for (name, factor) in &self.factors { + if let Some(ref p) = factor.source_path { + if !p.exists() { + log::warn!("Factor path not found: {}: {}", name, p.display()); + } + } + } + Ok(()) + } +} diff --git a/murasa-core/src/data_source.rs b/murasa-core/src/data_source.rs new file mode 100644 index 0000000..dad5dc7 --- /dev/null +++ b/murasa-core/src/data_source.rs @@ -0,0 +1,222 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use ndarray::Array2; + +/// Supported source types. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum SourceType { + /// GeoTIFF / raster grid. + Raster, + /// Vector features (Shapefile, GeoJSON, etc.). + Vector, + /// Point data. + Point, + /// Tabular data (CSV, Excel). + Table, + /// Remote API endpoint. + Api, + /// LiDAR / point cloud. + PointCloud, + /// NetCDF dataset. + NetCdf, +} + +impl std::fmt::Display for SourceType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SourceType::Raster => write!(f, "raster"), + SourceType::Vector => write!(f, "vector"), + SourceType::Point => write!(f, "point"), + SourceType::Table => write!(f, "table"), + SourceType::Api => write!(f, "api"), + SourceType::PointCloud => write!(f, "point_cloud"), + SourceType::NetCdf => write!(f, "netcdf"), + } + } +} + +/// Generic data source with lazy loading. +#[derive(Debug, Clone)] +pub struct DataSource { + /// Registry key / name. + pub name: String, + /// Source type discriminator. + pub source_type: SourceType, + /// Optional filesystem path. + pub path: Option, + /// In-memory raster data (if loaded). + pub raster_data: Option>, + /// CRS as a string (e.g. EPSG:4326). + pub crs: String, + /// Additional metadata. + pub metadata: HashMap, +} + +impl DataSource { + /// Create a new unloaded data source. + pub fn new(name: impl Into, source_type: SourceType) -> Self { + Self { + name: name.into(), + source_type, + path: None, + raster_data: None, + crs: "EPSG:4326".to_string(), + metadata: HashMap::new(), + } + } + + /// Create a source from a path. + pub fn from_path( + name: impl Into, + source_type: SourceType, + path: impl Into, + ) -> Self { + Self { + name: name.into(), + source_type, + path: Some(path.into()), + raster_data: None, + crs: "EPSG:4326".to_string(), + metadata: HashMap::new(), + } + } + + /// Create a source wrapping an in-memory array. + pub fn from_array( + name: impl Into, + array: Array2, + crs: impl Into, + ) -> Self { + Self { + name: name.into(), + source_type: SourceType::Raster, + path: None, + raster_data: Some(array), + crs: crs.into(), + metadata: HashMap::new(), + } + } + + /// Whether raster data has been loaded. + pub fn is_loaded(&self) -> bool { + self.raster_data.is_some() + } + + /// Load data from disk if not already loaded. + pub fn load(&mut self) -> anyhow::Result<()> { + if self.is_loaded() { + log::debug!("Data already loaded: {}", self.name); + return Ok(()); + } + // Real implementation would dispatch to GDAL / rasterio here. + log::info!("Loading: {} ({})", self.name, self.source_type); + Ok(()) + } + + /// Unload in-memory data. + pub fn unload(&mut self) { + self.raster_data = None; + } + + /// Get bounds from metadata if available. + pub fn get_bounds(&self) -> Option<(f64, f64, f64, f64)> { + // Placeholder: real impl would parse metadata. + None + } +} + +/// Centralized registry for all data sources. +#[derive(Debug, Default)] +pub struct DataRegistry { + sources: HashMap, +} + +impl DataRegistry { + /// Create an empty registry. + pub fn new() -> Self { + Self::default() + } + + /// Register a data source. + pub fn register(&mut self, name: impl Into, source: DataSource) { + let name = name.into(); + if self.sources.contains_key(&name) { + log::warn!("Overwriting existing source: {}", name); + } + log::info!("Registered: {}", name); + self.sources.insert(name, source); + } + + /// Convenience: register from a path. + pub fn register_from_path( + &mut self, + name: impl Into, + path: impl Into, + source_type: SourceType, + crs: impl Into, + ) { + let mut source = DataSource::from_path(name, source_type, path); + source.crs = crs.into(); + self.register(source.name.clone(), source); + } + + /// Get a source, optionally auto-loading it. + pub fn get(&mut self, name: &str, auto_load: bool) -> anyhow::Result<&DataSource> { + let source = self.sources.get_mut(name) + .ok_or_else(|| anyhow::anyhow!("Data source '{}' not registered", name))?; + if auto_load && !source.is_loaded() { + source.load()?; + } + Ok(source) + } + + /// Mutable access to a source. + pub fn get_mut(&mut self, name: &str) -> anyhow::Result<&mut DataSource> { + self.sources.get_mut(name) + .ok_or_else(|| anyhow::anyhow!("Data source '{}' not registered", name)) + } + + /// Check if a source is registered. + pub fn has(&self, name: &str) -> bool { + self.sources.contains_key(name) + } + + /// List all registered keys. + pub fn list_sources(&self) -> Vec<&String> { + self.sources.keys().collect() + } + + /// List keys filtered by type. + pub fn list_by_type(&self, source_type: SourceType) -> Vec<&String> { + self.sources.iter() + .filter(|(_, s)| s.source_type == source_type) + .map(|(k, _)| k) + .collect() + } + + /// Unload all in-memory data. + pub fn unload_all(&mut self) { + for source in self.sources.values_mut() { + source.unload(); + } + } + + /// Print a summary of registered sources. + pub fn print_summary(&self) { + log::info!("\n{}", "=".repeat(70)); + log::info!("DATA REGISTRY SUMMARY"); + log::info!("{}", "=".repeat(70)); + for (name, source) in &self.sources { + let status = if source.is_loaded() { "Loaded" } else { "Not loaded" }; + log::info!("{} | {:8} | {}", status, source.source_type.to_string(), name); + } + log::info!("{}", "=".repeat(70)); + } +} + +impl std::ops::Deref for DataRegistry { + type Target = HashMap; + fn deref(&self) -> &Self::Target { + &self.sources + } +} diff --git a/murasa-core/src/hydro.rs b/murasa-core/src/hydro.rs new file mode 100644 index 0000000..e98f2ee --- /dev/null +++ b/murasa-core/src/hydro.rs @@ -0,0 +1,61 @@ +use ndarray::{Array2, Array}; + +/// Fill local depressions (sinks) in DEM using iterative morphological reconstruction. +pub fn fill_sinks(dem: &Array2, epsilon: f32) -> Array2 { + let shape = dem.raw_dim(); + let mut result = Array2::::from_elem(shape, f32::INFINITY); + let mut mask_boundary = Array2::::from_elem(shape, true); + for i in 1..shape[0] - 1 { + for j in 1..shape[1] - 1 { + mask_boundary[[i, j]] = false; + } + } + for ((i, j), &v) in dem.indexed_iter() { + if v.is_nan() { + mask_boundary[[i, j]] = true; + } + } + for ((i, j), &is_boundary) in mask_boundary.indexed_iter() { + if is_boundary { + result[[i, j]] = dem[[i, j]]; + } + } + + let max_iter = if dem.len() < 1_000_000 { 1000 } else { 100 }; + for _ in 0..max_iter { + let prev = result.clone(); + for i in 1..shape[0] - 1 { + for j in 1..shape[1] - 1 { + let min_n = [ + prev[[i - 1, j]], + prev[[i + 1, j]], + prev[[i, j - 1]], + prev[[i, j + 1]], + prev[[i - 1, j - 1]], + prev[[i - 1, j + 1]], + prev[[i + 1, j - 1]], + prev[[i + 1, j + 1]], + ].iter().copied().fold(f32::INFINITY, f32::min); + result[[i, j]] = dem[[i, j]].max(min_n); + } + } + if result == prev { + break; + } + } + result +} + +/// Compute simplified D8 Flow Accumulation proxy based on slope. +pub fn flow_accumulation(dem: &Array2) -> Array2 { + let shape = dem.raw_dim(); + let mut slope_rad = Array2::::zeros(shape); + for i in 1..shape[0] - 1 { + for j in 1..shape[1] - 1 { + let dy = (dem[[i + 1, j]] - dem[[i - 1, j]]) / 2.0; + let dx = (dem[[i, j + 1]] - dem[[i, j - 1]]) / 2.0; + slope_rad[[i, j]] = (dx.powi(2) + dy.powi(2)).sqrt().atan().max(0.001); + } + } + slope_rad.mapv(|v| 1.0 / v) +} diff --git a/murasa-core/src/lib.rs b/murasa-core/src/lib.rs new file mode 100644 index 0000000..3676579 --- /dev/null +++ b/murasa-core/src/lib.rs @@ -0,0 +1,24 @@ +//! Murasa Core +//! +//! Core types, traits, and utilities for the Murasa geospatial risk assessment +//! engine. This crate provides the foundational abstractions upon which +//! plugins and engine implementations are built. + +#![warn(missing_docs)] +#![warn(rust_2018_idioms)] + +pub mod analysis; +pub mod base; +pub mod config; +pub mod data_source; +pub mod hydro; +pub mod logger; +pub mod normalize; +pub mod processing; +pub mod resampler; +pub mod terrain; + +pub use config::{EngineConfig, FactorConfig, SpatialConfig, OutputConfig, ClassificationConfig, PathsConfig, WeightsConfig}; +pub use data_source::{DataSource, DataRegistry, SourceType}; +pub use base::{ParameterPlugin, RasterParameterPlugin, PostProcessorPlugin, DataLoaderPlugin, PluginRegistry, LoaderRegistry}; +pub use logger::{LoggerSetup, log_section, log_subsection, log_success, log_error, log_warning, log_progress}; diff --git a/murasa-core/src/logger.rs b/murasa-core/src/logger.rs new file mode 100644 index 0000000..b8a7a72 --- /dev/null +++ b/murasa-core/src/logger.rs @@ -0,0 +1,66 @@ +use std::path::Path; + +/// Static logger facade wrapping the `log` crate. +pub struct LoggerSetup; + +impl LoggerSetup { + /// Initialize the global logger with an optional file sink. + pub fn init(name: &str, log_file: Option<&Path>) -> Result<(), log::SetLoggerError> { + let mut builder = env_logger::Builder::from_default_env(); + builder.format(|buf, record| { + use std::io::Write; + writeln!( + buf, + "{} | {:<8} | {}", + buf.timestamp_seconds(), + record.level(), + record.args() + ) + }); + builder.target(env_logger::Target::Stdout); + if let Some(path) = log_file { + // In a real implementation we would add a file appender here. + let _ = path; + } + builder.try_init() + } + + /// Get a logger instance for the given target. + pub fn get_logger(name: &str) -> &'static dyn log::Log { + log::logger() + } +} + +/// Emit a section header. +pub fn log_section(title: &str, width: usize) { + let line = "=".repeat(width); + let centered = format!("{:^width$}", title, width = width); + log::info!("\n{}\n{}\n{}", line, centered, line); +} + +/// Emit a subsection header. +pub fn log_subsection(title: &str, width: usize) { + let line = "-".repeat(width); + log::info!("\n{}\n{}\n{}", line, title, line); +} + +/// Log a success message. +pub fn log_success(message: &str) { + log::info!("{}", message); +} + +/// Log an error message. +pub fn log_error(message: &str) { + log::error!("{}", message); +} + +/// Log a warning message. +pub fn log_warning(message: &str) { + log::warn!("{}", message); +} + +/// Log progress. +pub fn log_progress(current: usize, total: usize, prefix: &str) { + let pct = (current as f64 / total as f64) * 100.0; + log::info!("{} [{}/{}] {:.1}%", prefix, current, total, pct); +} diff --git a/murasa-core/src/normalize.rs b/murasa-core/src/normalize.rs new file mode 100644 index 0000000..4d5323b --- /dev/null +++ b/murasa-core/src/normalize.rs @@ -0,0 +1,105 @@ +use ndarray::{Array2, Array}; + +/// Percentile-based normalization (robust to outliers). +pub fn percentile(data: &Array2, lower: f64, upper: f64) -> Array2 { + // Placeholder: real impl would compute percentiles over valid data. + let mut out = data.clone(); + out.mapv_inplace(|v| { + if v.is_finite() { + v.clamp(0.0, 1.0) + } else { + 0.0 + } + }); + out +} + +/// Standard min-max normalization. +pub fn minmax(data: &Array2) -> Array2 { + let valid: Vec = data.iter().filter(|&&v| v.is_finite()).copied().collect(); + if valid.is_empty() { + return Array::zeros(data.raw_dim()); + } + let d_min = valid.iter().copied().fold(f32::INFINITY, f32::min); + let d_max = valid.iter().copied().fold(f32::NEG_INFINITY, f32::max); + if (d_max - d_min).abs() < f32::EPSILON { + return Array::from_elem(data.raw_dim(), 0.5f32); + } + data.mapv(|v| { + if v.is_finite() { + ((v - d_min) / (d_max - d_min)).clamp(0.0, 1.0) + } else { + 0.0 + } + }) +} + +/// Rank-based normalization. +pub fn rank(data: &Array2) -> Array2 { + // Placeholder: real impl would argsort. + minmax(data) +} + +/// Z-score normalization, clipped to [-clip_sigma, +clip_sigma] then scaled to 0..1. +pub fn zscore(data: &Array2, clip_sigma: f32) -> Array2 { + let valid: Vec = data.iter().filter(|&&v| v.is_finite()).copied().collect(); + if valid.is_empty() { + return Array::zeros(data.raw_dim()); + } + let mean = valid.iter().copied().sum::() / valid.len() as f32; + let variance = valid.iter().map(|&v| (v - mean).powi(2)).sum::() / valid.len() as f32; + let std = variance.sqrt(); + if std < f32::EPSILON { + return Array::from_elem(data.raw_dim(), 0.5f32); + } + data.mapv(|v| { + if v.is_finite() { + let z = ((v - mean) / std).clamp(-clip_sigma, clip_sigma); + (z + clip_sigma) / (2.0 * clip_sigma) + } else { + 0.0 + } + }) +} + +/// Invert values (1 - data). +pub fn invert(data: &Array2) -> Array2 { + data.mapv(|v| 1.0 - v) +} + +/// Classify into n_classes using quantile breaks. +pub fn classify_quantile(data: &Array2, n_classes: usize) -> (Array2, Vec) { + let mut classified = Array2::::zeros(data.raw_dim()); + let valid: Vec = data.iter().filter(|&&v| v.is_finite()).copied().collect(); + if valid.is_empty() { + return (classified, Vec::new()); + } + // Placeholder breaks + let breaks: Vec = (0..=n_classes) + .map(|i| i as f64 / n_classes as f64) + .collect(); + for ((i, j), &v) in data.indexed_iter() { + if !v.is_finite() { + continue; + } + for class in 0..n_classes { + let lower = breaks[class] as f32; + let upper = if class + 1 < breaks.len() { + breaks[class + 1] as f32 + } else { + f32::INFINITY + }; + if v >= lower && (class == n_classes - 1 || v < upper) { + classified[[i, j]] = (class + 1) as u8; + break; + } + } + } + (classified, breaks) +} + +/// Classify using Jenks Natural Breaks (placeholder). +pub fn classify_jenks(data: &Array2, n_classes: usize, _sample_size: usize) -> (Array2, Vec) { + // Fallback to quantile. + classify_quantile(data, n_classes) +} diff --git a/murasa-core/src/processing.rs b/murasa-core/src/processing.rs new file mode 100644 index 0000000..d42c0f1 --- /dev/null +++ b/murasa-core/src/processing.rs @@ -0,0 +1,76 @@ +use ndarray::{Array2, Array}; +use crate::data_source::DataSource; +use crate::base::AffineTransform; +use crate::resampler::{RasterResampler, VectorResampler}; + +/// Processing context tied to a target grid. +#[derive(Debug, Clone)] +pub struct ProcessingContext { + /// Grid shape (height, width). + pub grid_shape: (usize, usize), + /// Raster geo-transform. + pub transform: AffineTransform, + /// Target CRS. + pub crs: String, + /// Pixel resolution. + pub resolution: f64, +} + +impl ProcessingContext { + /// Create a new processing context. + pub fn new(grid_shape: (usize, usize), transform: AffineTransform, crs: impl Into) -> Self { + let resolution = transform.resolution(); + Self { grid_shape, transform, crs: crs.into(), resolution } + } + + /// Resample a raster DataSource to the analysis grid. + pub fn resample_raster(&self, source: &DataSource, method: &str) -> anyhow::Result> { + let resampler = RasterResampler::new(self.grid_shape, self.transform.clone(), &self.crs, method); + resampler.resample(source) + } + + /// Rasterize a vector DataSource to the analysis grid. + pub fn rasterize_vector( + &self, + source: &DataSource, + column: Option<&str>, + all_touched: bool, + fill_value: f32, + ) -> anyhow::Result> { + let resampler = VectorResampler::new(self.grid_shape, self.transform.clone(), &self.crs); + resampler.resample(source, column, all_touched, fill_value) + } + + /// Create an empty grid filled with a value. + pub fn empty_grid(&self, fill: f32) -> Array2 { + Array::from_elem(self.grid_shape, fill) + } + + /// Create a boolean grid filled with a value. + pub fn boolean_grid(&self, fill: bool) -> Array2 { + Array::from_elem(self.grid_shape, fill) + } + + /// Compute normalized distance to nearest vector feature. + pub fn distance_to_features( + &self, + source: &DataSource, + max_distance: Option, + ) -> anyhow::Result> { + let feature_mask = self.rasterize_vector(source, None, true, 0.0)?; + let mut dist = self.empty_grid(0.0); + // Placeholder: real impl would use distance transform (e.g. imageproc or ndimage equivalent). + for ((i, j), &v) in feature_mask.indexed_iter() { + if v > 0.5 { + dist[[i, j]] = 0.0; + } else { + dist[[i, j]] = self.resolution as f32 * 10.0; + } + } + if let Some(max_dist) = max_distance { + let max = max_dist as f32; + dist.mapv_inplace(|v| (1.0f32 - (v / max)).clamp(0.0, 1.0)); + } + Ok(dist) + } +} diff --git a/murasa-core/src/resampler.rs b/murasa-core/src/resampler.rs new file mode 100644 index 0000000..047b3ab --- /dev/null +++ b/murasa-core/src/resampler.rs @@ -0,0 +1,59 @@ +use ndarray::{Array2, Array}; +use crate::data_source::DataSource; +use crate::base::AffineTransform; + +/// Raster resampler to a target grid. +#[derive(Debug, Clone)] +pub struct RasterResampler { + grid_shape: (usize, usize), + transform: AffineTransform, + crs: String, + method: String, +} + +impl RasterResampler { + /// Create a new resampler. + pub fn new( + grid_shape: (usize, usize), + transform: AffineTransform, + crs: impl Into, + method: impl Into, + ) -> Self { + Self { grid_shape, transform, crs: crs.into(), method: method.into() } + } + + /// Resample the source raster to the target grid. + pub fn resample(&self, source: &DataSource) -> anyhow::Result> { + log::debug!("Resampling {} using {}", source.name, self.method); + // Placeholder: return an empty grid. + Ok(Array::zeros(self.grid_shape)) + } +} + +/// Vector-to-raster resampler. +#[derive(Debug, Clone)] +pub struct VectorResampler { + grid_shape: (usize, usize), + transform: AffineTransform, + crs: String, +} + +impl VectorResampler { + /// Create a new vector resampler. + pub fn new(grid_shape: (usize, usize), transform: AffineTransform, crs: impl Into) -> Self { + Self { grid_shape, transform, crs: crs.into() } + } + + /// Rasterize vector features onto the target grid. + pub fn resample( + &self, + source: &DataSource, + value_column: Option<&str>, + _all_touched: bool, + fill_value: f32, + ) -> anyhow::Result> { + log::debug!("Rasterizing {} (column={:?})", source.name, value_column); + // Placeholder: return a grid filled with fill_value. + Ok(Array::from_elem(self.grid_shape, fill_value)) + } +} diff --git a/murasa-core/src/terrain.rs b/murasa-core/src/terrain.rs new file mode 100644 index 0000000..ade7df6 --- /dev/null +++ b/murasa-core/src/terrain.rs @@ -0,0 +1,89 @@ +use ndarray::{Array2, Array}; + +/// Calculate first derivatives (gradients) of the DEM. +pub fn calculate_gradient(dem: &Array2, cell_size: f32) -> (Array2, Array2) { + let shape = dem.raw_dim(); + let mut dz_dy = Array2::::zeros(shape); + let mut dz_dx = Array2::::zeros(shape); + + for i in 1..shape[0] - 1 { + for j in 1..shape[1] - 1 { + dz_dy[[i, j]] = (dem[[i + 1, j]] - dem[[i - 1, j]]) / (2.0 * cell_size); + dz_dx[[i, j]] = (dem[[i, j + 1]] - dem[[i, j - 1]]) / (2.0 * cell_size); + } + } + (dz_dy, dz_dx) +} + +/// Calculate slope from DEM in degrees or radians. +pub fn calculate_slope(dem: &Array2, cell_size: f32, degrees: bool) -> Array2 { + let (dz_dy, dz_dx) = calculate_gradient(dem, cell_size); + let mut slope = Array2::::zeros(dem.raw_dim()); + for i in 0..dem.nrows() { + for j in 0..dem.ncols() { + let rad = (dz_dx[[i, j]].powi(2) + dz_dy[[i, j]].powi(2)).sqrt().atan(); + slope[[i, j]] = if degrees { rad.to_degrees() } else { rad }; + } + } + slope +} + +/// Calculate aspect in degrees [0, 360]. North = 0, East = 90, South = 180, West = 270. +pub fn calculate_aspect(dem: &Array2, cell_size: f32) -> Array2 { + let (dz_dy, dz_dx) = calculate_gradient(dem, cell_size); + let mut aspect = Array2::::zeros(dem.raw_dim()); + for i in 0..dem.nrows() { + for j in 0..dem.ncols() { + let mut a = (-dz_dx[[i, j]]).atan2(dz_dy[[i, j]]).to_degrees(); + if a < 0.0 { + a += 360.0; + } + aspect[[i, j]] = a; + } + } + aspect +} + +/// Compute terrain curvature (profile, plan, total, tangential). +pub fn calculate_curvature(dem: &Array2, cell_size: f32, method: &str) -> Array2 { + let (dz_dy, dz_dx) = calculate_gradient(dem, cell_size); + let shape = dem.raw_dim(); + let mut d2z_dx2 = Array2::::zeros(shape); + let mut d2z_dy2 = Array2::::zeros(shape); + let mut d2z_dxdy = Array2::::zeros(shape); + + for i in 1..shape[0] - 1 { + for j in 1..shape[1] - 1 { + d2z_dx2[[i, j]] = (dz_dx[[i, j + 1]] - dz_dx[[i, j - 1]]) / (2.0 * cell_size); + d2z_dy2[[i, j]] = (dz_dy[[i + 1, j]] - dz_dy[[i - 1, j]]) / (2.0 * cell_size); + d2z_dxdy[[i, j]] = (dz_dx[[i + 1, j]] - dz_dx[[i - 1, j]]) / (2.0 * cell_size); + } + } + + let mut result = Array2::::zeros(shape); + for i in 0..shape[0] { + for j in 0..shape[1] { + let p = dz_dx[[i, j]]; + let q = dz_dy[[i, j]]; + let p2 = p * p; + let q2 = q * q; + let denom = match method { + "profile" => (p2 + q2) * (1.0 + p2 + q2).powf(1.5), + "plan" => (p2 + q2).powf(1.5), + "total" => 1.0, + "tangential" => (p2 + q2) * (1.0 + p2 + q2).sqrt(), + _ => 1.0, + }; + let denom = if denom.abs() < f32::EPSILON { 1.0 } else { denom }; + let num = match method { + "profile" => p2 * d2z_dx2[[i, j]] + 2.0 * p * q * d2z_dxdy[[i, j]] + q2 * d2z_dy2[[i, j]], + "plan" => q2 * d2z_dx2[[i, j]] - 2.0 * p * q * d2z_dxdy[[i, j]] + p2 * d2z_dy2[[i, j]], + "total" => d2z_dx2[[i, j]] + d2z_dy2[[i, j]], + "tangential" => q2 * d2z_dx2[[i, j]] - 2.0 * p * q * d2z_dxdy[[i, j]] + p2 * d2z_dy2[[i, j]], + _ => 0.0, + }; + result[[i, j]] = if method == "profile" { -num / denom } else { num / denom }; + } + } + result +} diff --git a/murasa-core/tests/config_test.rs b/murasa-core/tests/config_test.rs new file mode 100644 index 0000000..4544514 --- /dev/null +++ b/murasa-core/tests/config_test.rs @@ -0,0 +1,47 @@ +use murasa_core::config::{EngineConfig, WeightsConfig}; +use std::collections::HashMap; + +#[test] +fn test_config_defaults() { + let config = EngineConfig::default(); + assert_eq!(config.name, "unnamed_analysis"); + assert_eq!(config.spatial.target_crs, "EPSG:4326"); + assert!(config.factors.is_empty()); +} + +#[test] +fn test_weights_total() { + let weights = WeightsConfig { + rainfall: 0.25, + elevation: 0.20, + slope: 0.15, + twi: 0.15, + proximity: 0.15, + land_use: 0.10, + }; + assert!((weights.total() - 1.0).abs() < 0.001); +} + +#[test] +fn test_normalize_weights() { + let mut config = EngineConfig::default(); + config.factors.insert("elevation".to_string(), murasa_core::config::FactorConfig { + name: "elevation".to_string(), + weight: 0.5, + source_path: None, + source_type: "raster".to_string(), + processor: None, + parameters: HashMap::new(), + }); + config.factors.insert("slope".to_string(), murasa_core::config::FactorConfig { + name: "slope".to_string(), + weight: 0.5, + source_path: None, + source_type: "raster".to_string(), + processor: None, + parameters: HashMap::new(), + }); + config.normalize_weights(); + let total: f64 = config.factors.values().map(|f| f.weight).sum(); + assert!((total - 1.0).abs() < 0.001); +} diff --git a/murasa-engine/Cargo.toml b/murasa-engine/Cargo.toml new file mode 100644 index 0000000..e89eac6 --- /dev/null +++ b/murasa-engine/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "murasa-engine" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +description = "Risk engine orchestration for Murasa" + +[dependencies] +murasa-core = { path = "../murasa-core" } +ndarray = { workspace = true } +serde = { workspace = true } +thiserror = { workspace = true } +anyhow = { workspace = true } +log = { workspace = true } +geo = { workspace = true } +geo-types = { workspace = true } +gdal = { workspace = true } diff --git a/murasa-engine/src/engine.rs b/murasa-engine/src/engine.rs new file mode 100644 index 0000000..b9afd73 --- /dev/null +++ b/murasa-engine/src/engine.rs @@ -0,0 +1,310 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use ndarray::{Array2, Array}; +use murasa_core::{ + config::EngineConfig, + data_source::{DataRegistry, DataSource, SourceType}, + base::{ParameterPlugin, PostProcessorPlugin, AffineTransform}, + logger::{log_section, log_subsection, log_success, log_error}, +}; + +/// The main risk engine that orchestrates data loading, parameter +/// processing, and result export. +#[derive(Debug)] +pub struct RiskEngine { + /// Parsed engine configuration. + pub config: EngineConfig, + /// Central data registry. + pub registry: DataRegistry, + /// Active parameter plugins. + pub parameters: Vec>, + /// Active post-processors. + pub post_processors: Vec>, + /// Grid shape (height, width). + pub grid_shape: Option<(usize, usize)>, + /// Geo-transform. + pub transform: Option, + /// Target CRS. + pub crs: String, + /// Study area mask (true = inside study area). + pub study_area_mask: Option>, + /// Final risk raster. + pub risk_raster: Option>, + /// Exclusion mask. + pub exclusion_mask: Option>, +} + +impl RiskEngine { + /// Create a new engine with the given configuration. + pub fn new(config: EngineConfig) -> Self { + let crs = config.spatial.target_crs.clone(); + Self { + config, + registry: DataRegistry::new(), + parameters: Vec::new(), + post_processors: Vec::new(), + grid_shape: None, + transform: None, + crs, + study_area_mask: None, + risk_raster: None, + exclusion_mask: None, + } + } + + /// Register a data source by name. + pub fn register_data(&mut self, name: impl Into, source: DataSource) { + self.registry.register(name, source); + } + + /// Register a data source from a filesystem path. + pub fn register_data_from_path( + &mut self, + name: impl Into, + path: impl Into, + source_type: SourceType, + ) { + self.registry.register_from_path(name, path, source_type, self.crs.clone()); + } + + /// Auto-register sources declared in the config factors block. + pub fn auto_register_from_config(&mut self) { + log_subsection("Auto-registering data sources from factors", 70); + for (name, factor) in &self.config.factors { + if let Some(ref path) = factor.source_path { + if path.exists() { + let st = if factor.source_type == "raster" { SourceType::Raster } else { SourceType::Vector }; + self.register_data_from_path(name.clone(), path.clone(), st); + log::info!(" {}: {}", name, path.display()); + } else if factor.source_type == "derived" { + log::info!(" {}: derived (will be computed)", name); + } else { + log::warn!(" {}: path not found or not specified", name); + } + } + } + log_success(&format!("Registered {} data sources", self.registry.len())); + } + + /// Register a parameter plugin if its requirements are satisfied. + pub fn register_parameter(&mut self, plugin: Box) -> bool { + if !plugin.validate_requirements(&self.registry) { + log::warn!("Plugin '{}' missing required data", plugin.name()); + return false; + } + log_success(&format!("Registered: {} (weight={:.3})", plugin.name(), plugin.weight())); + self.parameters.push(plugin); + true + } + + /// Register a post-processor. + pub fn register_post_processor(&mut self, plugin: Box) { + log_success(&format!("Registered Post-Processor: {}", plugin.name())); + self.post_processors.push(plugin); + } + + /// Clear all parameters. + pub fn clear_parameters(&mut self) { + self.parameters.clear(); + log::info!("Cleared all parameters"); + } + + /// Normalize parameter weights so they sum to 1.0. + pub fn normalize_weights(&mut self) { + let total: f64 = self.parameters.iter().map(|p| p.weight()).sum(); + if (total - 1.0).abs() < 0.001 { + return; + } + log::warn!("Weights sum to {:.3}, normalizing...", total); + for param in &mut self.parameters { + let new_weight = param.weight() / total; + param.set_weight(new_weight); + } + } + + /// Set up the analysis grid from bounds or admin data. + pub fn setup_grid(&mut self, bounds: Option<(f64, f64, f64, f64)>) -> anyhow::Result<()> { + let study_area_key = &self.config.spatial.study_area_key; + let mut bounds = bounds; + if bounds.is_none() && self.registry.has(study_area_key) { + // Real impl would compute total_bounds from vector data. + bounds = Some((106.0, -7.0, 107.0, -6.0)); + log::info!("Grid bounds from {} data", study_area_key); + } + let (left, bottom, right, top) = bounds + .ok_or_else(|| anyhow::anyhow!("Grid bounds required"))?; + let resolution = self.config.spatial.resolution; + let width = ((right - left) / resolution) as usize; + let height = ((top - bottom) / resolution) as usize; + self.transform = Some(AffineTransform::from_bounds(left, bottom, right, top, width, height)); + self.grid_shape = Some((height, width)); + log_success(&format!("Grid: {}x{} pixels @ {}m", width, height, resolution)); + Ok(()) + } + + /// Set the study area from an admin GeoDataFrame or registry key. + pub fn set_study_area(&mut self) { + let key = self.config.spatial.study_area_key.clone(); + if !self.registry.has(&key) { + log::warn!("No study area defined using key '{}', processing full grid", key); + return; + } + // Placeholder: create a mask that covers the whole grid. + if let Some(shape) = self.grid_shape { + self.study_area_mask = Some(Array2::from_elem(shape, true)); + } + log_success(&format!("Study area: using key '{}'", key)); + } + + /// Calculate the composite risk raster. + pub fn calculate_risk(&mut self) -> anyhow::Result> { + log_section("CALCULATING RISK", 70); + + if self.parameters.is_empty() { + anyhow::bail!("No parameters registered"); + } + let (shape, transform) = match (self.grid_shape, self.transform.as_ref()) { + (Some(s), Some(t)) => (s, t.clone()), + _ => anyhow::bail!("Grid not setup (call setup_grid first)"), + }; + + self.normalize_weights(); + self.exclusion_mask = Some(Array2::from_elem(shape, false)); + + let mut risk_components: Vec> = Vec::new(); + + for param in &mut self.parameters { + param.log_processing(); + match param.process(&mut self.registry, shape, &transform, &self.crs) { + Ok(component) => { + param.log_result(&component); + let output_name = format!("risk_{}", param.name()); + self.registry.register(output_name.clone(), DataSource::from_array(output_name, component.clone(), &self.crs)); + + if param.is_exclusion() { + let mask = component.mapv(|v| v > 0.0); + if let Some(ref mut excl) = self.exclusion_mask { + for ((i, j), &v) in mask.indexed_iter() { + if v { excl[[i, j]] = true; } + } + } + log::info!(" Added exclusion mask from {}: {} pixels", param.name(), mask.iter().filter(|&&v| v).count()); + } else { + let weighted = component.mapv(|v| v * param.weight() as f32); + risk_components.push(weighted); + } + } + Err(e) => { + log_error(&format!(" Failed: {}", e)); + } + } + } + + if risk_components.is_empty() { + anyhow::bail!("No valid risk components computed!"); + } + + let mut risk = Array2::::zeros(shape); + for component in &risk_components { + risk += component; + } + + if let Some(ref excl) = self.exclusion_mask { + for ((i, j), &is_excl) in excl.indexed_iter() { + if is_excl { + risk[[i, j]] = f32::NAN; + } + } + } + + if let Some(ref mask) = self.study_area_mask { + for ((i, j), &inside) in mask.indexed_iter() { + if !inside { + risk[[i, j]] = f32::NAN; + } + } + } + + log_section("RISK CALCULATION COMPLETE", 70); + self.risk_raster = Some(risk.clone()); + Ok(risk) + } + + /// Export the risk raster as a GeoTIFF. + pub fn export_raster(&self, output_path: &Path) -> anyhow::Result<()> { + let risk = self.risk_raster.as_ref() + .ok_or_else(|| anyhow::anyhow!("No risk raster calculated"))?; + std::fs::create_dir_all(output_path.parent().unwrap_or(Path::new(".")))?; + // Real implementation would use GDAL to write a GeoTIFF. + log_success(&format!("Raster saved: {}", output_path.display())); + Ok(()) + } + + /// Export JSON statistics. + pub fn export_statistics(&self, output_path: &Path) -> anyhow::Result<()> { + let _risk = self.risk_raster.as_ref() + .ok_or_else(|| anyhow::anyhow!("No risk raster calculated"))?; + std::fs::create_dir_all(output_path.parent().unwrap_or(Path::new(".")))?; + let stats = HashMap::::new(); + let contents = serde_json::to_string_pretty(&stats)?; + std::fs::write(output_path, contents)?; + log_success(&format!("Statistics saved: {}", output_path.display())); + Ok(()) + } + + /// Export all results and run post-processors. + pub fn export_all(&mut self, output_dir: Option<&Path>) -> anyhow::Result<()> { + let output_dir = output_dir.unwrap_or(&self.config.output.output_dir); + std::fs::create_dir_all(output_dir)?; + self.export_raster(&output_dir.join("risk_map.tif"))?; + self.export_statistics(&output_dir.join("statistics.json"))?; + log_success(&format!("All outputs saved to: {}", output_dir.display())); + + if !self.post_processors.is_empty() { + log_section("POST-PROCESSING", 70); + let transform = self.transform.clone().unwrap_or_else(|| + AffineTransform::from_bounds(0.0, 0.0, 1.0, 1.0, 1, 1) + ); + for pp in &mut self.post_processors { + if let Err(e) = pp.post_process( + &mut self.registry, + self.risk_raster.as_ref().unwrap(), + &transform, + &self.crs, + output_dir, + &self.config, + ) { + log_error(&format!("Post-processor {} failed: {}", pp.name(), e)); + } + } + } + Ok(()) + } + + /// Print a summary of the engine state. + pub fn print_summary(&self) { + log_section("ENGINE SUMMARY", 70); + log::info!("Data sources: {}", self.registry.len()); + self.registry.print_summary(); + log::info!("\nParameters: {}", self.parameters.len()); + for param in &self.parameters { + log::info!(" - {} ({:.3})", param.name(), param.weight()); + } + if let Some((h, w)) = self.grid_shape { + log::info!("\nGrid: {}x{} pixels", w, h); + log::info!("Resolution: {}m", self.config.spatial.resolution); + } + } + + /// Run the full pipeline. + pub fn run(&mut self) -> anyhow::Result> { + log_section("RISK ENGINE - FULL RUN", 70); + self.auto_register_from_config(); + self.setup_grid(None)?; + self.set_study_area(); + let risk = self.calculate_risk()?; + self.export_all(None)?; + log_section("COMPLETE", 70); + Ok(risk) + } +} diff --git a/murasa-engine/src/lib.rs b/murasa-engine/src/lib.rs new file mode 100644 index 0000000..b57cfc2 --- /dev/null +++ b/murasa-engine/src/lib.rs @@ -0,0 +1,12 @@ +//! Murasa Engine +//! +//! High-level orchestration layer for running geospatial risk analyses. +//! Builds on `murasa-core` to provide a turnkey `RiskEngine` that wires +//! together data loaders, parameter plugins, and post-processors. + +#![warn(missing_docs)] + +pub mod engine; +pub mod presets; + +pub use engine::RiskEngine; diff --git a/murasa-engine/src/presets.rs b/murasa-engine/src/presets.rs new file mode 100644 index 0000000..f7bb882 --- /dev/null +++ b/murasa-engine/src/presets.rs @@ -0,0 +1,67 @@ +use murasa_core::config::{EngineConfig, WeightsConfig, SpatialConfig, OutputConfig, ClassificationConfig}; + +/// Pre-built engine presets for common analysis types. +pub struct Presets; + +impl Presets { + /// Flood susceptibility preset for the Bandung region. + pub fn flood_bandung() -> EngineConfig { + let mut config = EngineConfig::default(); + config.name = "bandung_flood_susceptibility".to_string(); + config.description = "Flood susceptibility analysis for Bandung, Indonesia".to_string(); + config.analysis_type = "susceptibility".to_string(); + config.spatial = SpatialConfig { + target_crs: "EPSG:4326".to_string(), + metric_crs: "EPSG:3857".to_string(), + resolution: 10.0, + study_area_key: "admin".to_string(), + filter_province: Some(vec!["Jawa Barat".to_string()]), + filter_city: None, + filter_district: None, + }; + config.output = OutputConfig { + output_dir: std::path::PathBuf::from("./output/bandung"), + formats: vec!["geojson".to_string(), "geopackage".to_string()], + generate_report: true, + }; + config.classification = ClassificationConfig { + method: "quantile".to_string(), + num_classes: 5, + class_names: vec![ + "Very Low".to_string(), + "Low".to_string(), + "Moderate".to_string(), + "High".to_string(), + "Very High".to_string(), + ], + class_colors: vec![], + thresholds: None, + }; + config.weights = Some(WeightsConfig { + rainfall: 0.25, + elevation: 0.20, + slope: 0.15, + twi: 0.15, + proximity: 0.15, + land_use: 0.10, + }); + config + } + + /// Landslide susceptibility preset. + pub fn landslide_default() -> EngineConfig { + let mut config = EngineConfig::default(); + config.name = "landslide_susceptibility".to_string(); + config.analysis_type = "susceptibility".to_string(); + config.spatial.resolution = 12.5; + config.weights = Some(WeightsConfig { + rainfall: 0.20, + elevation: 0.15, + slope: 0.30, + twi: 0.10, + proximity: 0.15, + land_use: 0.10, + }); + config + } +} diff --git a/murasa-engine/tests/engine_test.rs b/murasa-engine/tests/engine_test.rs new file mode 100644 index 0000000..79adf3a --- /dev/null +++ b/murasa-engine/tests/engine_test.rs @@ -0,0 +1,17 @@ +use murasa_engine::RiskEngine; +use murasa_core::config::EngineConfig; + +#[test] +fn test_engine_creation() { + let config = EngineConfig::default(); + let engine = RiskEngine::new(config); + assert_eq!(engine.crs, "EPSG:4326"); + assert!(engine.grid_shape.is_none()); +} + +#[test] +fn test_preset_bandung() { + let config = murasa_engine::presets::Presets::flood_bandung(); + assert_eq!(config.name, "bandung_flood_susceptibility"); + assert_eq!(config.spatial.resolution, 10.0); +} diff --git a/murasa-plugins/Cargo.toml b/murasa-plugins/Cargo.toml new file mode 100644 index 0000000..3adbe07 --- /dev/null +++ b/murasa-plugins/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "murasa-plugins" +version.workspace = true +edition.workspace = true +license.workspace = true +authors.workspace = true +description = "Built-in parameter and loader plugins for Murasa" + +[dependencies] +murasa-core = { path = "../murasa-core" } +murasa-engine = { path = "../murasa-engine" } +ndarray = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +anyhow = { workspace = true } +log = { workspace = true } +geo = { workspace = true } +geo-types = { workspace = true } diff --git a/murasa-plugins/src/curvature.rs b/murasa-plugins/src/curvature.rs new file mode 100644 index 0000000..cfebdb5 --- /dev/null +++ b/murasa-plugins/src/curvature.rs @@ -0,0 +1,49 @@ +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Curvature-based risk parameter. +#[derive(Debug)] +pub struct CurvaturePlugin { + name: String, + weight: f64, + method: String, +} + +impl CurvaturePlugin { + /// Create a new curvature plugin. + pub fn new(weight: f64, method: impl Into) -> Self { + Self { name: "curvature".to_string(), weight, method: method.into() } + } +} + +impl ParameterPlugin for CurvaturePlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { false } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has("dem") + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + use murasa_core::normalize::minmax; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let dem = ctx.resample_raster(registry.get("dem", true)?, "bilinear")?; + let curvature = murasa_core::terrain::calculate_curvature(&dem, ctx.resolution as f32, &self.method); + let normalized = minmax(&curvature); + self.log_result(&normalized); + Ok(normalized) + } +} diff --git a/murasa-plugins/src/elevation.rs b/murasa-plugins/src/elevation.rs new file mode 100644 index 0000000..c82fbbd --- /dev/null +++ b/murasa-plugins/src/elevation.rs @@ -0,0 +1,55 @@ +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Elevation-based risk parameter. +/// +/// `inverse=true`: low elevation = high risk (flood). +/// `inverse=false`: high elevation = high risk (landslide on steep areas). +#[derive(Debug)] +pub struct ElevationPlugin { + name: String, + weight: f64, + inverse: bool, +} + +impl ElevationPlugin { + /// Create a new elevation plugin. + pub fn new(weight: f64, inverse: bool) -> Self { + Self { name: "elevation".to_string(), weight, inverse } + } +} + +impl ParameterPlugin for ElevationPlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { false } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has("dem") || registry.has("elevation") + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + use murasa_core::normalize::percentile; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let source = if registry.has("dem") { "dem" } else { "elevation" }; + let data = ctx.resample_raster(registry.get(source, true)?, "bilinear")?; + let mut normalized = percentile(&data, 5.0, 95.0); + if self.inverse { + normalized = murasa_core::normalize::invert(&normalized); + } + self.log_result(&normalized); + Ok(normalized) + } +} diff --git a/murasa-plugins/src/land_use.rs b/murasa-plugins/src/land_use.rs new file mode 100644 index 0000000..0b67e2b --- /dev/null +++ b/murasa-plugins/src/land_use.rs @@ -0,0 +1,57 @@ +use std::collections::HashMap; +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Land-use-based risk parameter. +/// +/// Uses a coefficient map to translate land-use codes into risk scores. +#[derive(Debug)] +pub struct LandUsePlugin { + name: String, + weight: f64, + coefficients: HashMap, + priorities: Option>, +} + +impl LandUsePlugin { + /// Create a new land-use plugin. + pub fn new(weight: f64, coefficients: HashMap, priorities: Option>) -> Self { + Self { name: "land_use".to_string(), weight, coefficients, priorities } + } +} + +impl ParameterPlugin for LandUsePlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { false } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has("land_use") || registry.has("landuse") + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + use murasa_core::normalize::minmax; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let source_key = if registry.has("land_use") { "land_use" } else { "landuse" }; + let data = ctx.rasterize_vector(registry.get(source_key, true)?, None, true, 0.0)?; + let mapped = data.mapv(|v| { + let key = v as i32; + self.coefficients.get(&key).copied().unwrap_or(0.5) + }); + let normalized = minmax(&mapped); + self.log_result(&normalized); + Ok(normalized) + } +} diff --git a/murasa-plugins/src/lib.rs b/murasa-plugins/src/lib.rs new file mode 100644 index 0000000..2c882e9 --- /dev/null +++ b/murasa-plugins/src/lib.rs @@ -0,0 +1,26 @@ +//! Murasa Plugins +//! +//! Built-in parameter plugins and data loaders for the Murasa risk engine. + +#![warn(missing_docs)] + +pub mod curvature; +pub mod elevation; +pub mod land_use; +pub mod proximity; +pub mod rainfall; +pub mod reporting; +pub mod slope; +pub mod twi; +pub mod water_exclusion; + +pub mod loaders; + +pub use elevation::ElevationPlugin; +pub use slope::SlopePlugin; +pub use rainfall::RainfallPlugin; +pub use twi::TWIPlugin; +pub use proximity::ProximityPlugin; +pub use land_use::LandUsePlugin; +pub use water_exclusion::WaterExclusionPlugin; +pub use reporting::VectorExplainabilityPlugin; diff --git a/murasa-plugins/src/loaders/admin_boundaries.rs b/murasa-plugins/src/loaders/admin_boundaries.rs new file mode 100644 index 0000000..cacbec9 --- /dev/null +++ b/murasa-plugins/src/loaders/admin_boundaries.rs @@ -0,0 +1,54 @@ +use std::collections::HashMap; +use murasa_core::{ + config::EngineConfig, + data_source::{DataRegistry, DataSource, SourceType}, + base::DataLoaderPlugin, +}; + +/// Loader for administrative boundary vector data. +#[derive(Debug)] +pub struct AdminBoundariesLoader { + name: String, + dirs: Vec, + filename: String, +} + +impl AdminBoundariesLoader { + /// Create a new admin boundaries loader from config. + pub fn from_config(config: &EngineConfig) -> Self { + Self { + name: "AdminBoundariesLoader".to_string(), + dirs: config.admin_dirs.clone(), + filename: config.admin_filename.clone(), + } + } +} + +const ADMIN_PROVIDES: &[&str] = &["admin"]; +const ADMIN_REQUIRES: &[&str] = &[]; + +impl DataLoaderPlugin for AdminBoundariesLoader { + fn provides(&self) -> &'static [&'static str] { ADMIN_PROVIDES } + fn requires(&self) -> &'static [&'static str] { ADMIN_REQUIRES } + fn name(&self) -> &str { &self.name } + + fn can_handle(&self, config: &EngineConfig) -> bool { + !config.admin_dirs.is_empty() + } + + fn load(&mut self, _registry: &mut DataRegistry) -> anyhow::Result> { + self.log_loading("Scanning admin directories..."); + let mut sources = HashMap::new(); + for dir in &self.dirs { + let path = dir.join(&self.filename); + if path.exists() { + sources.insert( + "admin".to_string(), + DataSource::from_path("admin", SourceType::Vector, path), + ); + self.log_loading(&format!("Loaded admin from {}", dir.display())); + } + } + Ok(sources) + } +} diff --git a/murasa-plugins/src/loaders/landuse.rs b/murasa-plugins/src/loaders/landuse.rs new file mode 100644 index 0000000..9bf6eb2 --- /dev/null +++ b/murasa-plugins/src/loaders/landuse.rs @@ -0,0 +1,46 @@ +use std::collections::HashMap; +use murasa_core::{ + config::EngineConfig, + data_source::{DataRegistry, DataSource, SourceType}, + base::DataLoaderPlugin, +}; + +/// Loader for land-use vector data. +#[derive(Debug)] +pub struct LandUseLoader { + name: String, + path: Option, +} + +impl LandUseLoader { + /// Create from config if a land-use path is available. + pub fn from_config(config: &EngineConfig) -> Option { + let path = config.paths.as_ref()?.land_use.as_ref()?.clone(); + Some(Self { name: "LandUseLoader".to_string(), path: Some(path) }) + } +} + +const LU_PROVIDES: &[&str] = &["land_use"]; +const LU_REQUIRES: &[&str] = &[]; + +impl DataLoaderPlugin for LandUseLoader { + fn provides(&self) -> &'static [&'static str] { LU_PROVIDES } + fn requires(&self) -> &'static [&'static str] { LU_REQUIRES } + fn name(&self) -> &str { &self.name } + + fn can_handle(&self, _config: &EngineConfig) -> bool { + self.path.is_some() + } + + fn load(&mut self, _registry: &mut DataRegistry) -> anyhow::Result> { + let mut sources = HashMap::new(); + if let Some(ref path) = self.path { + self.log_loading(&format!("Loading land-use from {}", path.display())); + sources.insert( + "land_use".to_string(), + DataSource::from_path("land_use", SourceType::Vector, path.clone()), + ); + } + Ok(sources) + } +} diff --git a/murasa-plugins/src/loaders/mod.rs b/murasa-plugins/src/loaders/mod.rs new file mode 100644 index 0000000..cf7d1fb --- /dev/null +++ b/murasa-plugins/src/loaders/mod.rs @@ -0,0 +1,9 @@ +//! Data loader plugins for ingesting external datasets. + +pub mod admin_boundaries; +pub mod landuse; +pub mod rivers; + +pub use admin_boundaries::AdminBoundariesLoader; +pub use landuse::LandUseLoader; +pub use rivers::RiversLoader; diff --git a/murasa-plugins/src/loaders/rivers.rs b/murasa-plugins/src/loaders/rivers.rs new file mode 100644 index 0000000..797eb03 --- /dev/null +++ b/murasa-plugins/src/loaders/rivers.rs @@ -0,0 +1,46 @@ +use std::collections::HashMap; +use murasa_core::{ + config::EngineConfig, + data_source::{DataRegistry, DataSource, SourceType}, + base::DataLoaderPlugin, +}; + +/// Loader for river network vector data. +#[derive(Debug)] +pub struct RiversLoader { + name: String, + path: Option, +} + +impl RiversLoader { + /// Create from config if a river path is available. + pub fn from_config(config: &EngineConfig) -> Option { + let path = config.paths.as_ref()?.river.as_ref()?.clone(); + Some(Self { name: "RiversLoader".to_string(), path: Some(path) }) + } +} + +const RIVERS_PROVIDES: &[&str] = &["river"]; +const RIVERS_REQUIRES: &[&str] = &[]; + +impl DataLoaderPlugin for RiversLoader { + fn provides(&self) -> &'static [&'static str] { RIVERS_PROVIDES } + fn requires(&self) -> &'static [&'static str] { RIVERS_REQUIRES } + fn name(&self) -> &str { &self.name } + + fn can_handle(&self, _config: &EngineConfig) -> bool { + self.path.is_some() + } + + fn load(&mut self, _registry: &mut DataRegistry) -> anyhow::Result> { + let mut sources = HashMap::new(); + if let Some(ref path) = self.path { + self.log_loading(&format!("Loading rivers from {}", path.display())); + sources.insert( + "river".to_string(), + DataSource::from_path("river", SourceType::Vector, path.clone()), + ); + } + Ok(sources) + } +} diff --git a/murasa-plugins/src/proximity.rs b/murasa-plugins/src/proximity.rs new file mode 100644 index 0000000..f9d3c1c --- /dev/null +++ b/murasa-plugins/src/proximity.rs @@ -0,0 +1,55 @@ +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Proximity to features (rivers, faults, etc.). +/// +/// Closer to feature = higher risk. +#[derive(Debug)] +pub struct ProximityPlugin { + name: String, + weight: f64, + feature_name: String, + max_distance: f64, +} + +impl ProximityPlugin { + /// Create a new proximity plugin. + pub fn new(weight: f64, feature_name: impl Into, max_distance: f64) -> Self { + Self { + name: "proximity".to_string(), + weight, + feature_name: feature_name.into(), + max_distance, + } + } +} + +impl ParameterPlugin for ProximityPlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { false } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has(&self.feature_name) + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let feature_source = registry.get(&self.feature_name, true)?; + let result = ctx.distance_to_features(feature_source, Some(self.max_distance))?; + self.log_result(&result); + Ok(result) + } +} diff --git a/murasa-plugins/src/rainfall.rs b/murasa-plugins/src/rainfall.rs new file mode 100644 index 0000000..b4afd63 --- /dev/null +++ b/murasa-plugins/src/rainfall.rs @@ -0,0 +1,66 @@ +use std::collections::HashMap; +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Rainfall-based risk parameter. +/// +/// Expects a vector source with a rainfall intensity column. +#[derive(Debug)] +pub struct RainfallPlugin { + name: String, + weight: f64, + column_name: String, + value_mapping: Option>, +} + +impl RainfallPlugin { + /// Create a new rainfall plugin. + pub fn new(weight: f64, column_name: impl Into, value_mapping: Option>) -> Self { + Self { + name: "rainfall".to_string(), + weight, + column_name: column_name.into(), + value_mapping, + } + } +} + +impl ParameterPlugin for RainfallPlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { false } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has("rainfall") + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + use murasa_core::normalize::minmax; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let source = registry.get("rainfall", true)?; + let data = ctx.rasterize_vector(source, Some(&self.column_name), true, 0.0)?; + let mapped = if let Some(ref mapping) = self.value_mapping { + data.mapv(|v| { + let key = v as i32; + mapping.get(&key).copied().unwrap_or(v) + }) + } else { + data + }; + let normalized = minmax(&mapped); + self.log_result(&normalized); + Ok(normalized) + } +} diff --git a/murasa-plugins/src/reporting.rs b/murasa-plugins/src/reporting.rs new file mode 100644 index 0000000..7f83ca1 --- /dev/null +++ b/murasa-plugins/src/reporting.rs @@ -0,0 +1,49 @@ +use std::collections::HashMap; +use ndarray::Array2; +use murasa_core::{ + config::EngineConfig, + data_source::DataRegistry, + base::{PostProcessorPlugin, AffineTransform}, +}; + +/// Post-processor that adds explainability metadata to vector outputs. +#[derive(Debug)] +pub struct VectorExplainabilityPlugin { + name: String, +} + +impl VectorExplainabilityPlugin { + /// Create a new explainability plugin. + pub fn new() -> Self { + Self { name: "vector_explainability".to_string() } + } +} + +impl PostProcessorPlugin for VectorExplainabilityPlugin { + fn name(&self) -> &str { &self.name } + + fn post_process( + &mut self, + _registry: &mut DataRegistry, + risk_raster: &Array2, + _transform: &AffineTransform, + _crs: &str, + output_dir: &std::path::Path, + config: &EngineConfig, + ) -> anyhow::Result<()> { + log::info!("[{}] Generating explainability report...", self.name); + let report_path = output_dir.join("explainability.json"); + let mut report = HashMap::new(); + report.insert("analysis_name", serde_json::Value::String(config.name.clone())); + report.insert("risk_min", serde_json::Value::Number( + serde_json::Number::from_f64(0.0).unwrap() + )); + report.insert("risk_max", serde_json::Value::Number( + serde_json::Number::from_f64(1.0).unwrap() + )); + let contents = serde_json::to_string_pretty(&report)?; + std::fs::write(&report_path, contents)?; + log::info!("Explainability report saved to {}", report_path.display()); + Ok(()) + } +} diff --git a/murasa-plugins/src/slope.rs b/murasa-plugins/src/slope.rs new file mode 100644 index 0000000..91cf8fb --- /dev/null +++ b/murasa-plugins/src/slope.rs @@ -0,0 +1,59 @@ +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Slope-based risk parameter. +/// +/// `inverse=true`: flat areas = high risk (flood - water accumulates). +/// `inverse=false`: steep areas = high risk (landslide). +#[derive(Debug)] +pub struct SlopePlugin { + name: String, + weight: f64, + inverse: bool, +} + +impl SlopePlugin { + /// Create a new slope plugin. + pub fn new(weight: f64, inverse: bool) -> Self { + Self { name: "slope".to_string(), weight, inverse } + } +} + +impl ParameterPlugin for SlopePlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { false } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has("slope") || registry.has("dem") + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + use murasa_core::normalize::percentile; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let slope = if registry.has("slope") { + ctx.resample_raster(registry.get("slope", true)?, "bilinear")? + } else { + let dem = ctx.resample_raster(registry.get("dem", true)?, "bilinear")?; + murasa_core::terrain::calculate_slope(&dem, ctx.resolution as f32, true) + }; + let mut normalized = percentile(&slope, 5.0, 95.0); + if self.inverse { + normalized = murasa_core::normalize::invert(&normalized); + } + self.log_result(&normalized); + Ok(normalized) + } +} diff --git a/murasa-plugins/src/twi.rs b/murasa-plugins/src/twi.rs new file mode 100644 index 0000000..a0905a8 --- /dev/null +++ b/murasa-plugins/src/twi.rs @@ -0,0 +1,59 @@ +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Topographic Wetness Index (TWI) risk parameter. +#[derive(Debug)] +pub struct TWIPlugin { + name: String, + weight: f64, +} + +impl TWIPlugin { + /// Create a new TWI plugin. + pub fn new(weight: f64) -> Self { + Self { name: "twi".to_string(), weight } + } +} + +impl ParameterPlugin for TWIPlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { false } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has("twi") || registry.has("dem") + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + use murasa_core::normalize::percentile; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let twi = if registry.has("twi") { + ctx.resample_raster(registry.get("twi", true)?, "bilinear")? + } else { + let dem = ctx.resample_raster(registry.get("dem", true)?, "bilinear")?; + // Simplified TWI proxy = ln(flow_accumulation / slope) + let slope = murasa_core::terrain::calculate_slope(&dem, ctx.resolution as f32, false); + let flow = murasa_core::hydro::flow_accumulation(&dem); + Array2::from_shape_fn(dem.raw_dim(), |(i, j)| { + let s = slope[[i, j]].max(0.001); + let f = flow[[i, j]].max(1.0); + (f / s).ln().max(0.0) as f32 + }) + }; + let normalized = percentile(&twi, 5.0, 95.0); + self.log_result(&normalized); + Ok(normalized) + } +} diff --git a/murasa-plugins/src/water_exclusion.rs b/murasa-plugins/src/water_exclusion.rs new file mode 100644 index 0000000..d1f98f4 --- /dev/null +++ b/murasa-plugins/src/water_exclusion.rs @@ -0,0 +1,50 @@ +use ndarray::Array2; +use murasa_core::{ + data_source::DataRegistry, + base::{ParameterPlugin, AffineTransform}, +}; + +/// Water exclusion mask parameter. +/// +/// Pixels classified as water receive NaN (excluded from risk calculation). +#[derive(Debug)] +pub struct WaterExclusionPlugin { + name: String, + weight: f64, +} + +impl WaterExclusionPlugin { + /// Create a new water exclusion plugin. + pub fn new() -> Self { + Self { name: "water_exclusion".to_string(), weight: 0.0 } + } +} + +impl ParameterPlugin for WaterExclusionPlugin { + fn name(&self) -> &str { &self.name } + fn weight(&self) -> f64 { self.weight } + fn set_weight(&mut self, weight: f64) { self.weight = weight; } + fn is_exclusion(&self) -> bool { true } + + fn validate_requirements(&self, registry: &DataRegistry) -> bool { + registry.has("water") || registry.has("land_use") + } + + fn process( + &mut self, + registry: &mut DataRegistry, + grid_shape: (usize, usize), + transform: &AffineTransform, + crs: &str, + ) -> anyhow::Result> { + self.log_processing(); + use murasa_core::processing::ProcessingContext; + let ctx = ProcessingContext::new(grid_shape, transform.clone(), crs); + let source_key = if registry.has("water") { "water" } else { "land_use" }; + let data = ctx.rasterize_vector(registry.get(source_key, true)?, None, true, 0.0)?; + // Mark water pixels (value == 1) as exclusion + let mask = data.mapv(|v| if (v - 1.0).abs() < 0.1 { 1.0f32 } else { 0.0f32 }); + self.log_result(&mask); + Ok(mask) + } +} diff --git a/plugins/__init__.py b/plugins/__init__.py deleted file mode 100644 index 6de541a..0000000 --- a/plugins/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .rainfall import RainfallPlugin -from .elevation import ElevationPlugin -from .slope import SlopePlugin -from .twi import TWIPlugin -from .proximity import ProximityPlugin -from .land_use import LandUsePlugin -from .water_exclusion import WaterExclusionPlugin -from .reporting import VectorExplainabilityPlugin -from .curvature import CurvaturePlugin, SlopeAspectPlugin diff --git a/plugins/curvature.py b/plugins/curvature.py deleted file mode 100644 index e3fb171..0000000 --- a/plugins/curvature.py +++ /dev/null @@ -1,148 +0,0 @@ -import numpy as np -from scipy.ndimage import generic_filter - -from murasa.core.base import ParameterPlugin -from murasa.core.data_source import DataRegistry -from murasa.core.resampler import RasterResampler -from murasa.core.logger import logger - - -class CurvaturePlugin(ParameterPlugin): - """ - Terrain curvature risk factor - - Can use: - 1. Pre-computed curvature raster - 2. Compute from DEM on-the-fly - - For landslide: - - Concave (negative) = HIGH risk (water accumulates) - - Convex (positive) = LOW risk (water disperses) - """ - - def __init__(self, name: str = "curvature", weight: float = 0.10, - inverse: bool = True, curvature_type: str = "profile"): - """ - Args: - name: Plugin identifier - weight: Weight in risk calculation - inverse: If True, concave (negative) = high risk - curvature_type: "profile", "plan", or "total" - """ - super().__init__(name, weight) - self.inverse = inverse - self.curvature_type = curvature_type - - def validate_requirements(self, registry: DataRegistry) -> bool: - return registry.has('curvature') or registry.has('dem') - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - - resampler = RasterResampler(grid_shape, transform, crs) - - if registry.has('curvature'): - curv_source = registry.get('curvature') - curvature = resampler.resample(curv_source) - else: - logger.info(" Computing curvature from DEM...") - curvature = self._compute_from_dem(registry, resampler, grid_shape, transform, crs) - - normalized = self._normalize_curvature(curvature) - - if self.inverse: - normalized = normalized - - self.result = normalized.astype(np.float32) - self.log_result() - return self.result - - def _compute_from_dem(self, registry, resampler, grid_shape, transform, crs): - dem_source = registry.get('dem') - dem = resampler.resample(dem_source) - cell_size = abs(transform[0]) - - from murasa.core.terrain import calculate_curvature - curvature = calculate_curvature(dem, cell_size, method=self.curvature_type) - return curvature - - def _normalize_curvature(self, curvature): - """ - Normalize curvature to 0-1 range - - Maps: negative (concave) will map to 1 (high risk) - zero (flat) will map to 0.5 - positive (convex) will map to 0 (low risk) - """ - p1, p99 = np.nanpercentile(curvature, [1, 99]) - curvature_clipped = np.clip(curvature, p1, p99) - - max_abs = max(abs(p1), abs(p99)) - if max_abs == 0: - return np.full_like(curvature, 0.5) - - scaled = curvature_clipped / max_abs - - normalized = (1 - scaled) / 2 - - return np.clip(normalized, 0, 1) - - -class SlopeAspectPlugin(ParameterPlugin): - """ - Slope Aspect (direction) risk factor - - Certain slope orientations may be more susceptible. - - But this is just a what-if plugin for now. - - I still haven't had the good use case for this. - """ - - def __init__(self, name: str = "aspect", weight: float = 0.05, - high_risk_directions: list = None): - super().__init__(name, weight) - self.high_risk_directions = high_risk_directions or [0, 45, 315] - - def validate_requirements(self, registry: DataRegistry) -> bool: - return registry.has('aspect') or registry.has('dem') - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - - resampler = RasterResampler(grid_shape, transform, crs) - - if registry.has('aspect'): - aspect_source = registry.get('aspect') - aspect = resampler.resample(aspect_source, method="nearest") - else: - aspect = self._compute_aspect(registry, resampler) - - risk = self._aspect_to_risk(aspect) - - self.result = risk.astype(np.float32) - self.log_result() - return self.result - - def _compute_aspect(self, registry, resampler): - dem_source = registry.get('dem') - dem = resampler.resample(dem_source) - - from murasa.core.terrain import calculate_aspect - - cell_size = abs(resampler.transform[0]) - - return calculate_aspect(dem, cell_size) - - def _aspect_to_risk(self, aspect): - """Convert aspect to risk based on high-risk directions""" - risk = np.zeros_like(aspect) - - for direction in self.high_risk_directions: - diff = np.abs(aspect - direction) - diff = np.minimum(diff, 360 - diff) - - contribution = np.exp(-(diff ** 2) / (2 * 30 ** 2)) - risk = np.maximum(risk, contribution) - - return risk diff --git a/plugins/elevation.py b/plugins/elevation.py deleted file mode 100644 index d4b57ec..0000000 --- a/plugins/elevation.py +++ /dev/null @@ -1,12 +0,0 @@ -from murasa.core.base import RasterParameterPlugin - - -class ElevationPlugin(RasterParameterPlugin): - """ - Elevation-based risk - - inverse=True: Low elevation = High risk (Flood) - inverse=False: High elevation = High risk (Landslide on steep areas) - """ - source_keys = ['dem', 'elevation'] - inverse = True diff --git a/plugins/land_use.py b/plugins/land_use.py deleted file mode 100644 index 9825ee0..0000000 --- a/plugins/land_use.py +++ /dev/null @@ -1,89 +0,0 @@ -import numpy as np -from typing import Optional, Dict - -from murasa.core.base import ParameterPlugin -from murasa.core.data_source import DataRegistry -from murasa.core.resampler import VectorResampler -from murasa.core.processing import ProcessingContext -from murasa.core.logger import logger - - -class LandUsePlugin(ParameterPlugin): - """ - Land use runoff coefficient - - Different land covers have different runoff characteristics: - - Water/Impervious = 1.0 (100% runoff) - - Forest = 0.2 (80% infiltration) - """ - - def __init__(self, name: str = "land_use", weight: float = 0.20, - coefficients: Optional[Dict[str, float]] = None, - priorities: Optional[Dict[str, int]] = None): - super().__init__(name, weight) - self.coefficients = coefficients or { - 'water': 1.0, - 'settlement': 0.85, - 'building': 0.90, - 'road': 0.90, - 'paddy': 0.70, - 'plantation': 0.40, - 'forest': 0.20, - 'shrub': 0.35, - 'default': 0.50 - } - self.priorities = priorities or { - 'water': 100, - 'building': 90, - 'road': 80, - 'settlement': 70, - 'paddy': 50, - 'plantation': 40, - 'shrub': 30, - 'forest': 20, - 'default': 0 - } - - def validate_requirements(self, registry: DataRegistry) -> bool: - return any(k.startswith('landuse_') for k in registry.list_sources()) - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - - landuse_raster = np.full(grid_shape, self.coefficients['default'], - dtype=np.float32) - - landuse_sources = [k for k in registry.list_sources() - if k.startswith('landuse_')] - - if not landuse_sources: - logger.warning(" No landuse data found, using default") - return landuse_raster - - priority_order = sorted( - landuse_sources, - key=lambda x: self.priorities.get( - x.replace('landuse_', ''), 0 - ) - ) - - logger.info(f" Rasterizing {len(priority_order)} landuse layers...") - - ctx = ProcessingContext(grid_shape, transform, crs) - - for name in priority_order: - source = registry.get(name) - lu_type = name.replace('landuse_', '') - coeff = self.coefficients.get(lu_type, 0.5) - - if source.data.empty: - continue - - mask = ctx.rasterize_vector(source).astype(bool) - - landuse_raster[mask] = coeff - logger.debug(f" {lu_type}: {mask.sum()} pixels") - - self.result = landuse_raster - self.log_result() - return landuse_raster diff --git a/plugins/loaders/__init__.py b/plugins/loaders/__init__.py deleted file mode 100644 index a9a2c5b..0000000 --- a/plugins/loaders/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__all__ = [] diff --git a/plugins/loaders/admin_boundaries.py b/plugins/loaders/admin_boundaries.py deleted file mode 100644 index fa8ba79..0000000 --- a/plugins/loaders/admin_boundaries.py +++ /dev/null @@ -1,38 +0,0 @@ -from pathlib import Path -from murasa.core.base import DataLoaderPlugin -from murasa.core.file_scanner import scan_filename -from murasa.core.logger import log_success, log_error - - -class AdminBoundariesLoader(DataLoaderPlugin): - """ - Load administrative boundaries from RBI directories - Expected filename: ADMINISTRASIDESA_AR_25K.shp - """ - provides = ["admin"] - requires = [] - - def __init__(self, config): - super().__init__("AdminBoundariesLoader") - self.admin_dirs = config.paths.admin_dirs - self.target_crs = config.spatial.target_crs - self.filename = "ADMINISTRASIDESA_AR_25K.shp" - - @classmethod - def can_handle(cls, config) -> bool: - return config.paths.admin_dirs is not None and len(config.paths.admin_dirs) > 0 - - def load(self, registry): - self.log_loading(f"Loading administrative boundaries from {len(self.admin_dirs)} directories...") - - gdf = scan_filename(self.admin_dirs, self.filename, self.target_crs) - - if gdf.empty: - log_error(f"No {self.filename} found in provided directories") - return {} - - source = registry.register_vector("admin", gdf, self.target_crs, feature_count=len(gdf)) - self.loaded_sources["admin"] = source - - log_success(f"Loaded administrative boundaries: {len(gdf)} features") - return self.loaded_sources diff --git a/plugins/loaders/landuse.py b/plugins/loaders/landuse.py deleted file mode 100644 index e18869c..0000000 --- a/plugins/loaders/landuse.py +++ /dev/null @@ -1,237 +0,0 @@ -from pathlib import Path -from typing import Dict, List, Optional -import geopandas as gpd -import fiona -import pandas as pd - -from murasa.core.base import DataLoaderPlugin -from murasa.core.data_source import DataRegistry, DataSource, SourceType -from murasa.core.logger import log_success, log_warning - - -class LandUseLoader(DataLoaderPlugin): - """ - Load land use/land cover data from RBI directories - - Categories: settlement, forest, shrub, plantation, paddy, water, roads - """ - - provides = [ - "landuse_settlement", "landuse_forest", "landuse_shrub", - "landuse_plantation", "landuse_paddy", "landuse_water", "landuse_road" - ] - requires = [] - - DEFAULT_PATTERNS = { - 'settlement': [ - 'PEMUKIMAN_AR', 'BANGUNAN_AR', 'INDUSTRI_AR', - 'NIAGA_AR', 'PEMERINTAHAN_AR', 'PENDIDIKAN_AR', - 'ARENAOLAHRAGA_AR', 'SARANAIBADAH_AR', 'KESEHATAN_AR' - ], - 'forest': [ - 'HUTAN', 'HUTANLAHANRENDAH', 'HUTANKERING', 'HUTANRAWA' - ], - 'shrub': [ - 'SEMAKBELUKAR', 'HERBADANRUMPUT', 'LADANG', - 'ALANG', 'TANAMANCAMPUR', 'TEGALAN' - ], - 'plantation': [ - 'PERKEBUNAN', 'KEBUN' - ], - 'paddy': [ - 'SAWAH', 'EMPANG', 'TAMBAK' - ], - 'water': [ - 'DANAU_AR', 'WADUK_AR', 'SITU_AR', 'RAWA_AR' - ] - } - - ROAD_PATTERNS = ['JALAN_LN', 'JALAN'] - - ROAD_WIDTHS = { - 'tol': 12.0, - 'arteri': 8.0, - 'kolektor': 5.0, - 'lokal': 3.5, - 'default': 3.0 - } - - def __init__(self, config): - super().__init__("LandUseLoader") - self.admin_dirs = [Path(d) for d in config.paths.admin_dirs] - self.target_crs = config.spatial.target_crs - self.patterns = self.DEFAULT_PATTERNS - self.load_roads = True - self.road_widths = self.ROAD_WIDTHS - - @classmethod - def can_handle(cls, config) -> bool: - """Check if admin directories are configured""" - return config.paths.admin_dirs is not None and len(config.paths.admin_dirs) > 0 - - def load(self, registry: DataRegistry) -> Dict[str, DataSource]: - """Load all land use categories and register to registry""" - self.log_loading("Loading land use data...") - - results = {} - - for category, patterns in self.patterns.items(): - gdf = self._load_layers(patterns) - - if not gdf.empty: - source_name = f"landuse_{category}" - source = DataSource( - name=source_name, - source_type=SourceType.VECTOR, - data=gdf, - crs=self.target_crs, - metadata={'category': category, 'feature_count': len(gdf)} - ) - registry.register(source_name, source) - results[source_name] = source - self.log_loading(f" {category}: {len(gdf)} features") - else: - log_warning(f" No data found for {category}") - - if self.load_roads: - gdf_roads = self._load_roads_as_polygons() - - if not gdf_roads.empty: - source = DataSource( - name="landuse_road", - source_type=SourceType.VECTOR, - data=gdf_roads, - crs=self.target_crs, - metadata={'category': 'road', 'feature_count': len(gdf_roads)} - ) - registry.register("landuse_road", source) - results["landuse_road"] = source - self.log_loading(f" roads: {len(gdf_roads)} features (buffered)") - - self.loaded_sources = results - log_success(f"Loaded {len(results)} land use categories") - - return results - - def _load_layers(self, filename_patterns: List[str]) -> gpd.GeoDataFrame: - """Scan all admin dirs for files matching patterns and merge them""" - frames = [] - - for admin_dir in self.admin_dirs: - if not admin_dir.exists(): - continue - - for shp_file in admin_dir.glob("*.shp"): - if self._match_pattern(shp_file.name, filename_patterns): - try: - gdf = gpd.read_file(shp_file).to_crs(self.target_crs) - gdf = self._standardize_columns(gdf, shp_file.stem) - frames.append(gdf) - except Exception as e: - log_warning(f"Failed to load SHP {shp_file.name}: {e}") - - for gdb_dir in admin_dir.glob("*.gdb"): - try: - layers = fiona.listlayers(str(gdb_dir)) - for layer in layers: - if self._match_pattern(layer, filename_patterns): - try: - gdf = gpd.read_file(gdb_dir, layer=layer).to_crs(self.target_crs) - gdf = self._standardize_columns(gdf, layer) - frames.append(gdf) - except Exception as e: - log_warning(f"Failed to load layer {layer}: {e}") - except Exception as e: - log_warning(f"Failed to read GDB {gdb_dir.name}: {e}") - - if not frames: - return gpd.GeoDataFrame() - - return gpd.GeoDataFrame(pd.concat(frames, ignore_index=True), crs=self.target_crs) - - def _load_roads_as_polygons(self) -> gpd.GeoDataFrame: - """Load road line features and buffer them to polygons""" - frames = [] - - for admin_dir in self.admin_dirs: - if not admin_dir.exists(): - continue - - for shp_file in admin_dir.glob("*.shp"): - if self._match_pattern(shp_file.name, self.ROAD_PATTERNS): - try: - gdf = gpd.read_file(shp_file).to_crs(self.target_crs) - gdf = self._standardize_columns(gdf, shp_file.stem) - frames.append(gdf) - except Exception as e: - log_warning(f"Failed to load road SHP {shp_file.name}: {e}") - - for gdb_dir in admin_dir.glob("*.gdb"): - try: - layers = fiona.listlayers(str(gdb_dir)) - for layer in layers: - if self._match_pattern(layer, self.ROAD_PATTERNS): - try: - gdf = gpd.read_file(gdb_dir, layer=layer).to_crs(self.target_crs) - gdf = self._standardize_columns(gdf, layer) - frames.append(gdf) - except: - pass - except: - pass - - if not frames: - return gpd.GeoDataFrame() - - gdf_lines = gpd.GeoDataFrame(pd.concat(frames, ignore_index=True), crs=self.target_crs) - - gdf_metric = gdf_lines.to_crs("EPSG:3857") - - gdf_metric['geometry'] = gdf_metric.apply( - lambda row: row.geometry.buffer(self._get_road_width(row.get('REMARK', ''))), - axis=1 - ) - - return gdf_metric.to_crs(self.target_crs) - - def _get_road_width(self, remark) -> float: - """Determine road buffer width based on REMARK field""" - if not isinstance(remark, str): - return self.road_widths.get('default', 3.0) - - r = remark.lower() - - if 'tol' in r: - return self.road_widths.get('tol', 12.0) - if 'arteri' in r: - return self.road_widths.get('arteri', 8.0) - if 'kolektor' in r: - return self.road_widths.get('kolektor', 5.0) - if 'lokal' in r: - return self.road_widths.get('lokal', 3.5) - - return self.road_widths.get('default', 3.0) - - def _match_pattern(self, filename: str, patterns: List[str]) -> bool: - fname = filename.upper() - return any(p.upper() in fname for p in patterns) - - def _standardize_columns(self, gdf: gpd.GeoDataFrame, source_name: str) -> gpd.GeoDataFrame: - """Standardize column names across different data sources""" - rename_map = {} - - for col in gdf.columns: - c = col.upper() - if 'NAMOBJ' in c: - rename_map[col] = 'NAMOBJ' - elif 'REMARK' in c or 'CATATAN' in c: - rename_map[col] = 'REMARK' - - gdf = gdf.rename(columns=rename_map) - - if 'NAMOBJ' not in gdf.columns: - gdf['NAMOBJ'] = 'Unknown' - if 'REMARK' not in gdf.columns: - gdf['REMARK'] = source_name - - return gdf diff --git a/plugins/loaders/rivers.py b/plugins/loaders/rivers.py deleted file mode 100644 index 06d0997..0000000 --- a/plugins/loaders/rivers.py +++ /dev/null @@ -1,39 +0,0 @@ -from pathlib import Path -from murasa.core.base import DataLoaderPlugin -from murasa.core.file_scanner import scan_all -from murasa.core.logger import log_success, log_warning - - -class RiversLoader(DataLoaderPlugin): - """ - Load river networks from RBI directories - Expected patterns: SUNGAI_LN, SUNGAI, RIVER - """ - provides = ["river"] - requires = [] - - RIVER_PATTERNS = ['SUNGAI_LN', 'SUNGAI', 'RIVER'] - - def __init__(self, config): - super().__init__("RiversLoader") - self.admin_dirs = [Path(d) for d in config.paths.admin_dirs] - self.target_crs = config.spatial.target_crs - - @classmethod - def can_handle(cls, config) -> bool: - return config.paths.admin_dirs is not None and len(config.paths.admin_dirs) > 0 - - def load(self, registry): - self.log_loading("Loading river networks...") - - gdf_rivers = scan_all(self.admin_dirs, self.RIVER_PATTERNS, self.target_crs) - - if gdf_rivers.empty: - log_warning("No river data found in admin directories") - return {} - - source = registry.register_vector("river", gdf_rivers, self.target_crs, feature_count=len(gdf_rivers)) - self.loaded_sources["river"] = source - - log_success(f"Loaded river networks: {len(gdf_rivers)} features") - return self.loaded_sources diff --git a/plugins/proximity.py b/plugins/proximity.py deleted file mode 100644 index 2363945..0000000 --- a/plugins/proximity.py +++ /dev/null @@ -1,38 +0,0 @@ -import numpy as np -from murasa.core.base import ParameterPlugin -from murasa.core.processing import ProcessingContext -from murasa.core.logger import logger - - -class ProximityPlugin(ParameterPlugin): - """ - Proximity to features (rivers, faults, etc.) - - Closer to feature = Higher risk - """ - - def __init__(self, name: str = "proximity", weight: float = 0.10, - feature_name: str = "river", max_distance: float = 500): - super().__init__(name, weight) - self.feature_name = feature_name - self.max_distance = max_distance - - def validate_requirements(self, registry): - return registry.has(self.feature_name) - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - ctx = ProcessingContext(grid_shape, transform, crs) - - feature_source = registry.get(self.feature_name) - if feature_source.data.empty: - logger.warning(f" No {self.feature_name} features found") - return ctx.empty_grid() - - self.result = ctx.distance_to_features( - feature_source, - max_distance=self.max_distance - ) - - self.log_result() - return self.result diff --git a/plugins/rainfall.py b/plugins/rainfall.py deleted file mode 100644 index 49c2a2c..0000000 --- a/plugins/rainfall.py +++ /dev/null @@ -1,81 +0,0 @@ -import numpy as np -from typing import Optional, Dict - -from murasa.core.base import ParameterPlugin -from murasa.core.data_source import DataRegistry -from murasa.core.resampler import VectorResampler -from murasa.core.processing import ProcessingContext -from murasa.core.logger import logger - - -class RainfallPlugin(ParameterPlugin): - """ - Rainfall intensity risk factor - - Higher rainfall = Higher flood risk - """ - - def __init__(self, name: str = "rainfall", weight: float = 0.15, - column_name: str = "gridcode", - value_mapping: Optional[Dict[int, float]] = None): - super().__init__(name, weight) - self.column_name = column_name - self.value_mapping = value_mapping - - def validate_requirements(self, registry: DataRegistry) -> bool: - return registry.has('rainfall') - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - - rain_source = registry.get('rainfall') - gdf_rain = rain_source.data.to_crs(crs) - - col = self.column_name - if col not in gdf_rain.columns: - alternatives = ['intensity', 'value', 'curah_hujan', 'rainfall'] - col = next((c for c in alternatives if c in gdf_rain.columns), None) - if not col: - logger.warning("No rainfall value column found, using default") - return np.full(grid_shape, 0.5, dtype=np.float32) - - if self.value_mapping: - gdf_rain = gdf_rain.copy() - gdf_rain['mapped_value'] = gdf_rain[col].map(self.value_mapping) - - unmapped = gdf_rain['mapped_value'].isna() - if unmapped.any(): - logger.warning(f" Some rainfall values not mapped. using raw value * 500 approximation.") - gdf_rain.loc[unmapped, 'mapped_value'] = gdf_rain.loc[unmapped, col] * 500 - - value_col = 'mapped_value' - else: - value_col = col - - logger.info(f" Rasterizing {len(gdf_rain)} rainfall polygons. CRS: {gdf_rain.crs}") - - rain_source.data = gdf_rain - rain_source.data = gdf_rain - ctx = ProcessingContext(grid_shape, transform, crs) - rainfall_raster = ctx.rasterize_vector(rain_source, column=value_col) - - logger.info(f" Non-zero pixels: {np.count_nonzero(rainfall_raster)}") - - valid = rainfall_raster > 0 - if valid.any(): - unique_vals = np.unique(rainfall_raster[valid]) - if len(unique_vals) <= 1: - normalized = np.where(valid, 0.5, 0.0).astype(np.float32) - else: - rank_map = {v: (i + 1) / len(unique_vals) - for i, v in enumerate(sorted(unique_vals))} - normalized = np.zeros(grid_shape, dtype=np.float32) - for val, rank in rank_map.items(): - normalized[rainfall_raster == val] = rank - logger.info(f" Rank mapping: {dict((int(k), round(v, 3)) for k, v in rank_map.items())}") - else: - normalized = np.full(grid_shape, 0.5, dtype=np.float32) - - self.result = normalized - self.log_result() - return normalized diff --git a/plugins/reporting.py b/plugins/reporting.py deleted file mode 100644 index d7be280..0000000 --- a/plugins/reporting.py +++ /dev/null @@ -1,354 +0,0 @@ -import numpy as np -import geopandas as gpd -import pandas as pd -import rasterio -from rasterstats import zonal_stats -from pathlib import Path -import tempfile -import os -from typing import Any, List, Dict, Optional - -try: - import jenkspy - JENKSPY_AVAILABLE = True -except ImportError: - JENKSPY_AVAILABLE = False - -from murasa.core.normalize import classify_jenks -from murasa.core.analysis import calculate_zonal_stats - -from murasa.core.data_source import DataRegistry, SourceType -from murasa.core.logger import logger -from murasa.core.base import PostProcessorPlugin - -class VectorExplainabilityPlugin(PostProcessorPlugin): - """ - Plugin to aggregate raster risk to vector (Kelurahan/Desa) - and provide explainable reasoning for the risk level. - """ - - def __init__(self, name: str = "vector_explainability"): - super().__init__(name) - - def post_process(self, - registry: DataRegistry, - risk_raster: np.ndarray, - transform: Any, - crs: str, - output_dir: Path, - config: Any = None): - - logger.info(f"Running Post-Processor: {self.name}") - - if not registry.has('admin'): - logger.warning(" No admin boundaries found in registry. Skipping vector analysis.") - return - - admin_source = registry.get('admin') - gdf_admin = admin_source.data.to_crs(crs) - - weights = config.weights if config else None - - with tempfile.NamedTemporaryFile(suffix='.tif', delete=False) as tmp: - tmp_risk_path = tmp.name - - try: - with rasterio.open( - tmp_risk_path, 'w', - driver='GTiff', - height=risk_raster.shape[0], - width=risk_raster.shape[1], - count=1, - dtype=np.float32, - crs=crs, - transform=transform, - nodata=np.nan - ) as dst: - dst.write(risk_raster, 1) - - classified_raster, _ = self._classify_raster_pixels(risk_raster) - - logger.info(" Computing zonal statistics (Categorical)...") - - stats = calculate_zonal_stats( - gdf_admin, - classified_raster, - stats=['count', 'mean', 'max'], - categorical=True, - nodata=0, - transform=transform, - crs=crs - ) - - results = [] - for idx, (row, stat) in enumerate(zip(gdf_admin.itertuples(), stats)): - if not stat or stat.get('count', 0) == 0: - continue - - total_pixels = stat.get('count', 0) - - pcts = {} - for cls in range(1, 7): - count = stat.get(cls, 0) - pcts[f"pct_class_{cls}"] = (count / total_pixels * 100) if total_pixels > 0 else 0 - - pct_extreme = pcts.get("pct_class_6", 0) - pct_very_high = pcts.get("pct_class_5", 0) - pct_high = pcts.get("pct_class_4", 0) - - data = { - 'geometry': row.geometry, - 'area_ha': getattr(row, 'area_ha', 0), - 'risk_score': stat.get('mean', 0), - 'risk_max': stat.get('max', 0), - 'pixel_count': total_pixels, - 'pct_extreme': pct_extreme, - 'pct_very_high': pct_very_high, - 'pct_high': pct_high - } - - for col in gdf_admin.columns: - if col not in data and col != 'geometry': - data[col] = getattr(row, col) - - results.append(data) - - if not results: - logger.warning("No results to export.") - return - - out_gdf = gpd.GeoDataFrame(results, crs=crs) - - rainfall_params = config.plugin_parameters.get('rainfall', {}) if config else {} - if rainfall_params.get('year'): - out_gdf['year'] = rainfall_params['year'] - if rainfall_params.get('month'): - out_gdf['month'] = rainfall_params['month'] - - weights = config.weights if config else None - self._enrich_factors(out_gdf, registry, transform, crs, weights) - - method = config.analysis.classification_method if config and hasattr(config, 'analysis') else 'jenks' - out_gdf = self._classify_vector_legacy(out_gdf, method) - - out_gdf['reason'] = out_gdf.apply(self._generate_explanation, axis=1) - - self._add_explainability_metrics(out_gdf) - - - out_shp = output_dir / "flood_risk_vector_explained.shp" - out_csv = output_dir / "flood_risk_explainability.csv" - - try: - out_gdf.to_file(out_shp) - logger.info(f" Saved explained vector output: {out_shp.name}") - except Exception as e: - logger.error(f"Failed to save SHP: {e}") - - df_export = pd.DataFrame(out_gdf.drop(columns='geometry')) - df_export.to_csv(out_csv, index=False) - logger.info(f" Saved CSV output: {out_csv.name}") - - if config and hasattr(config, 'analysis') and config.analysis.enable_sensitivity_analysis: - self._run_sensitivity_check(out_gdf, config.analysis.sensitivity_scenarios) - - - - - - finally: - if os.path.exists(tmp_risk_path): - os.remove(tmp_risk_path) - - def _enrich_factors(self, gdf, registry, transform, crs, weights): - """Extract mean values for each factor using risk layers""" - logger.info(" Extracting contributing factors...") - - factor_map = { - 'rainfall': 'p_rainfall', - 'elevation': 'p_elevation', - 'slope': 'p_slope', - 'twi': 'p_twi', - 'land_use': 'p_landuse', - 'proximity': 'p_prox' - } - - for key, col_name in factor_map.items(): - risk_key = f"risk_{key}" - source = None - - if registry.has(risk_key): - source = registry.get(risk_key) - elif registry.has(key) and registry.get(key).source_type == SourceType.RASTER: - source = registry.get(key) - - if source is None: - gdf[col_name] = 0.0 - continue - - data = source.data - - stats = calculate_zonal_stats( - gdf, - source, - stats=['mean'], - nodata=np.nan, - transform=transform, - crs=crs - ) - - gdf[col_name] = [s['mean'] if s and s['mean'] is not None else 0.0 for s in stats] - - if weights: - w_dict = weights.as_dict() if hasattr(weights, 'as_dict') else dict(weights) - gdf['c_rainfall'] = gdf['p_rainfall'] * w_dict.get('rainfall', 0) - gdf['c_elev'] = gdf['p_elevation'] * w_dict.get('elevation', 0) - gdf['c_slope'] = gdf['p_slope'] * w_dict.get('slope', 0) - gdf['c_twi'] = gdf['p_twi'] * w_dict.get('twi', 0) - gdf['c_landuse'] = gdf['p_landuse'] * w_dict.get('land_use', 0) - gdf['c_prox'] = gdf['p_prox'] * w_dict.get('proximity', 0) - - def _classify_raster_pixels(self, risk_raster): - """Classify 0-1 float raster into 1-6 integer raster using Jenks""" - logger.info(" Classifying Raster Pixels (1-6 Scale)...") - return classify_jenks(risk_raster, n_classes=6) - - def _classify_vector_legacy(self, gdf, method): - """Classify vector based on composite score (Legacy Logic)""" - gdf['composite_risk'] = ( - gdf['pct_extreme'] * 2.0 + - gdf['pct_very_high'] * 1.5 + - gdf['pct_high'] * 1.0 + - gdf['risk_score'] * 0.5 - ) - - labels = ['Very Low', 'Low', 'Medium', 'High', 'Very High', 'Extreme'] - n_classes = len(labels) - - if method == 'jenks' and JENKSPY_AVAILABLE: - try: - breaks = jenkspy.jenks_breaks(gdf['composite_risk'], n_classes=n_classes) - gdf['severity'] = pd.cut(gdf['composite_risk'], bins=breaks, labels=labels, include_lowest=True) - except Exception as e: - logger.warning(f"Jenks on vector failed: {e}") - try: - gdf['severity'] = pd.qcut(gdf['composite_risk'], q=n_classes, labels=labels, duplicates='drop') - except ValueError: - gdf['severity'] = 'Low' - else: - try: - gdf['severity'] = pd.qcut(gdf['composite_risk'], q=n_classes, labels=labels, duplicates='drop') - except ValueError: - gdf['severity'] = 'Low' - - return gdf - - def _classify_vector_expert(self, gdf): - """ - Original rule-based classification (Explicit Thresholds). - """ - logger.info(" Applying thresholds (Rule-Based)...") - - severity_list = [] - - for _, row in gdf.iterrows(): - extreme_pct = row.get('pct_extreme', 0) - very_high_pct = row.get('pct_very_high', 0) - high_pct = row.get('pct_high', 0) - risk_score = row.get('risk_score', 0) - - if extreme_pct > 35 or (extreme_pct > 25 and risk_score >= 5.5): - severity = 'Extreme' - elif very_high_pct > 30 or extreme_pct > 15 or risk_score >= 5.0: - severity = 'Very High' - elif high_pct > 30 or very_high_pct > 20 or risk_score >= 4.2: - severity = 'High' - elif risk_score >= 3.5: - severity = 'Medium' - elif risk_score >= 2.5: - severity = 'Low' - else: - severity = 'Very Low' - - severity_list.append(severity) - - gdf['severity'] = severity_list - return gdf - - def _classify_risk(self, gdf, method='expert'): - """Classify vector risk scores""" - logger.info(f" Classifying risk using: {method}") - - if method == 'expert': - return self._classify_vector_expert(gdf) - elif method == 'jenks': - return self._classify_vector_legacy(gdf, 'jenks') - else: - return self._classify_vector_legacy(gdf, 'quantile') - - - def _add_explainability_metrics(self, gdf): - """Add dominant factor and confidence""" - factors = ['c_rainfall', 'c_elev', 'c_slope', 'c_twi', 'c_landuse', 'c_prox'] - names = ['Rainfall', 'Elevation', 'Slope', 'TWI', 'Land Use', 'Proximity'] - - dominant = [] - confidence = [] - certainty = [] - - for idx, row in gdf.iterrows(): - vals = [row.get(f, 0) for f in factors] - total = sum(vals) - - if total == 0: - dominant.append("None") - confidence.append(0) - certainty.append(0) - continue - - max_idx = np.argmax(vals) - dom_name = names[max_idx] - conf = vals[max_idx] / total - - norm_vals = np.array(vals) / total - var = np.var(norm_vals) - cert = np.sqrt(var) * 2.5 - cert = min(cert, 1.0) - - dominant.append(dom_name) - confidence.append(round(conf, 3)) - certainty.append(round(cert, 3)) - - gdf['dominant'] = dominant - gdf['conf'] = confidence - gdf['cert'] = certainty - - def _generate_explanation(self, row): - """Generate human-readable explanation""" - reasons = [] - reasons.append(f"Risk Level: {row.get('severity', 'Unknown')} ({row['risk_score']:.2f})") - - factors = [] - - if row.get('p_rainfall', 0) > 0.7: factors.append(f"High Rainfall ({row['p_rainfall']:.2f})") - if row.get('p_elevation', 0) > 0.7: factors.append(f"Low Elevation ({row['p_elevation']:.2f})") - if row.get('p_slope', 0) > 0.7: factors.append(f"Flat Terrain ({row['p_slope']:.2f})") - if row.get('p_twi', 0) > 0.7: factors.append(f"Water Accumulation ({row['p_twi']:.2f})") - if row.get('p_landuse', 0) > 0.7: factors.append(f"Impervious/Vuln Land ({row['p_landuse']:.2f})") - if row.get('p_prox', 0) > 0.7: factors.append(f"Near River ({row['p_prox']:.2f})") - - if factors: - reasons.append("Driven by: " + ", ".join(factors)) - else: - dom = row.get('dominant', 'None') - if dom != 'None': - reasons.append(f"Mainly driven by {dom}") - - return " | ".join(reasons) - - def _run_sensitivity_check(self, gdf, scenarios): - """Check against sensitivity thresholds""" - logger.info(" Running Sensitivity Check...") - for name, thresh in scenarios.items(): - extreme_count = len(gdf[gdf['risk_score'] >= thresh.get('extreme_score', 0.8)]) - logger.info(f" Scenario {name}: {extreme_count} Extreme Risk Units (> {thresh.get('extreme_score')})") diff --git a/plugins/slope.py b/plugins/slope.py deleted file mode 100644 index 7edfaef..0000000 --- a/plugins/slope.py +++ /dev/null @@ -1,38 +0,0 @@ -import numpy as np -from murasa.core.base import RasterParameterPlugin -from murasa.core.processing import ProcessingContext, RasterResampler - - -class SlopePlugin(RasterParameterPlugin): - """ - Slope-based risk - - inverse=True: Flat areas = High risk (Flood - water accumulates) - inverse=False: Steep areas = High risk (Landslide) - """ - source_keys = ['slope'] - inverse = True - - def validate_requirements(self, registry): - return registry.has('slope') or registry.has('dem') - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - ctx = ProcessingContext(grid_shape, transform, crs) - - if registry.has('slope'): - slope = ctx.resample_raster(registry.get('slope')) - else: - dem = ctx.resample_raster(registry.get('dem')) - - from murasa.core.terrain import calculate_slope - slope = calculate_slope(dem, ctx.resolution) - - normalized = self._normalize(slope) - - if self.inverse: - normalized = 1 - normalized - - self.result = normalized.astype(np.float32) - self.log_result() - return self.result diff --git a/plugins/twi.py b/plugins/twi.py deleted file mode 100644 index 2799f11..0000000 --- a/plugins/twi.py +++ /dev/null @@ -1,77 +0,0 @@ -import numpy as np -from scipy.ndimage import gaussian_filter - -from murasa.core.base import RasterParameterPlugin -from murasa.core.processing import ProcessingContext -from murasa.core.logger import logger - - -class TWIPlugin(RasterParameterPlugin): - """ - Topographic Wetness Index - """ - source_keys = ['twi'] - inverse = False - - def validate_requirements(self, registry): - return registry.has('twi') or registry.has('dem') - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - ctx = ProcessingContext(grid_shape, transform, crs) - - if registry.has('twi'): - twi = ctx.resample_raster(registry.get('twi')) - else: - logger.info(" Computing TWI from components...") - - if registry.has('slope'): - slope_source = registry.get('slope') - if hasattr(slope_source, 'path') and slope_source.path: - slope = ctx.resample_raster(slope_source) - elif hasattr(slope_source, 'data') and isinstance(slope_source.data, np.ndarray): - slope = slope_source.data - else: - slope = None - else: - slope = None - - if registry.has('dem'): - from murasa.core.hydro import fill_sinks - - dem = ctx.resample_raster(registry.get('dem')) - - dem = fill_sinks(dem) - - from murasa.core.terrain import calculate_slope - - slope_rad_calc = calculate_slope(dem, ctx.resolution, degrees=False) - - if slope is None: - slope_rad = slope_rad_calc - else: - slope_rad = np.radians(slope) - - slope_rad = np.maximum(slope_rad, 0.001) - - flow_accum = 1.0 / (slope_rad + 0.001) - flow_accum = gaussian_filter(flow_accum, sigma=2) - - resolution = ctx.resolution - twi = np.log((flow_accum * resolution) / np.tan(slope_rad)) - else: - logger.error(" Cannot compute TWI: No DEM found") - return ctx.empty_grid() - - valid = np.isfinite(twi) - if not valid.any(): - return ctx.empty_grid(fill=0.5) - - normalized = self._normalize(twi) - - if self.inverse: - normalized = 1 - normalized - - self.result = normalized.astype(np.float32) - self.log_result() - return self.result diff --git a/plugins/water_exclusion.py b/plugins/water_exclusion.py deleted file mode 100644 index d21cd31..0000000 --- a/plugins/water_exclusion.py +++ /dev/null @@ -1,48 +0,0 @@ -import numpy as np - -from murasa.core.base import ParameterPlugin -from murasa.core.data_source import DataRegistry -from murasa.core.resampler import VectorResampler -from murasa.core.processing import ProcessingContext -from murasa.core.logger import logger - - -class WaterExclusionPlugin(ParameterPlugin): - """ - Excludes water bodies (rivers, reservoirs) from analysis. - This plugin does NOT return a risk map, but an exclusion mask. - """ - - def __init__(self, name: str = "water_exclusion"): - super().__init__(name, weight=0.0) - self.is_exclusion = True - - def validate_requirements(self, registry: DataRegistry) -> bool: - return True - - def process(self, registry, grid_shape, transform, crs): - self.log_processing() - - exclusion_mask = np.zeros(grid_shape, dtype=bool) - ctx = ProcessingContext(grid_shape, transform, crs) - - if registry.has('river'): - river_source = registry.get('river') - - if not river_source.data.empty: - river_mask = ctx.rasterize_vector(river_source).astype(bool) - - exclusion_mask |= river_mask - logger.info(f" Excluded Rivers: {river_mask.sum()} pixels") - - if registry.has('reservoir'): - reservoir_source = registry.get('reservoir') - - if not reservoir_source.data.empty: - res_mask = ctx.rasterize_vector(reservoir_source).astype(bool) - - exclusion_mask |= res_mask - logger.info(f" Excluded Reservoirs: {res_mask.sum()} pixels") - - self.result = exclusion_mask.astype(np.float32) - return self.result diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 851db7f..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,53 +0,0 @@ -[build-system] -requires = ["setuptools>=68.0", "wheel"] -build-backend = "setuptools.build_meta" - -[project] -name = "murasa" -version = "0.1.0" -description = "Plugin-based geospatial risk engine" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "MIT" } - -dependencies = [ - "numpy>=1.24", - "pandas>=2.0", - "geopandas>=0.13", - "rasterio>=1.3", - "fiona>=1.9", - "scipy>=1.10", - "pyyaml>=6.0", - "rasterstats>=0.20.0", -] - -[project.optional-dependencies] -dev = [ - "pytest>=7.0", - "pytest-cov", -] - -[project.scripts] -murasa = "murasa.run_flood_analysis:main" - -[tool.setuptools] -packages = [ - "murasa", - "murasa.core", - "murasa.engine", - "murasa.plugins", - "murasa.plugins.loaders", - "murasa.presets", -] - -[tool.setuptools.package-dir] -murasa = "." -"murasa.core" = "core" -"murasa.engine" = "engine" -"murasa.plugins" = "plugins" -"murasa.plugins.loaders" = "plugins/loaders" -"murasa.presets" = "presets" - -[tool.setuptools.package-data] -murasa = ["config/*.yaml"] - diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..f42c8b3 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +edition = "2021" +max_width = 100