diff --git a/.gitignore b/.gitignore index 8ab8f39..5d98a29 100644 --- a/.gitignore +++ b/.gitignore @@ -221,3 +221,5 @@ __marimo__/ # aim data/ datatest/ + +tmp/ diff --git a/README.md b/README.md index 9966c6a..dbc96bf 100644 --- a/README.md +++ b/README.md @@ -28,18 +28,26 @@ uv add aimx pip install aimx ``` -### Install the agent skill +### Install the agent skills -This repository also includes an `aimx` skill for agent workflows such as -`autoresearch` `log_experiment`, where an LLM needs to collect run parameters, -metric summaries, traces, and image evidence from a local Aim repository. +This repository also includes agent skills for Aimx AutoResearch workflows. +The existing `aimx` skill is the Observe subsystem for `log_experiment`, where +an LLM collects run parameters, metric summaries, traces, and image evidence +from a local Aim repository. The build skills help create or audit repositories +that are ready for that loop. ```bash npx skills install blizhan/aimx ``` -After installation, invoke the skill as `$aimx`. The skill assumes the `aimx` -CLI is available in the environment that performs the experiment inspection. +After installation, invoke: + +- `$aimx` for read-only experiment evidence collection. +- `$aimx-hydra-lightning-builder` for Hydra + Lightning + Aim scaffold and + migration-audit workflows. + +The skills assume the `aimx` CLI is available in the environment that performs +experiment inspection. ### Check your environment diff --git a/skills/aimx-hydra-lightning-builder/SKILL.md b/skills/aimx-hydra-lightning-builder/SKILL.md new file mode 100644 index 0000000..cf0e3b0 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/SKILL.md @@ -0,0 +1,75 @@ +--- +name: aimx-hydra-lightning-builder +description: Use when creating, auditing, or planning migration for Hydra + PyTorch Lightning ML repositories that should log to Aim, expose Aimx-readable experiment evidence, or use configurable datamodules/models/plmodules/callbacks/loggers. +--- + +# Aimx Hydra Lightning Builder + +## Overview + +Build or migrate Hydra + Lightning repositories that satisfy the Aimx AutoResearch contract. + +The pattern is distilled from production Hydra + Lightning research repositories: Hydra composes the experiment, Lightning owns the training lifecycle, and Aim/Aimx preserve evidence for agentic iteration. + +## Workflows + +### Scaffold a new repository + +Run: + +```bash +python skills/aimx-hydra-lightning-builder/scripts/scaffold_repo.py \ + --stack hydra-lightning \ + --name \ + --package \ + --preset classification \ + --output +``` + +Then validate inside the generated repository: + +```bash +uv sync +uv run pytest +uv run python src/train.py trainer.fast_dev_run=true trainer.logger=false +``` + +### Audit an existing repository + +Run read-only: + +```bash +python skills/aimx-hydra-lightning-builder/scripts/audit_repo.py \ + --stack hydra-lightning \ + --repo \ + --format json > audit.json +python skills/aimx-hydra-lightning-builder/scripts/plan_migration.py \ + --audit audit.json \ + --target-stack hydra-lightning +``` + +Never edit, format, sync dependencies, generate files, or run mutation/codegen commands inside audit targets unless the user separately approves migration execution. + +## Architecture Pattern + +Read `references/architecture.md` before scaffold or migration work. + +- `configs/.yaml` composes `data`, `datamodule`, `model`, `plmodule`, `trainer`, `callbacks`, `logger`, `paths`, `accelerate`, and optional `experiment`. +- `src/train.py` seeds, instantiates configured objects, logs hyperparameters, and calls `trainer.fit/validate/test`. +- `BaseLitModule` owns `cfg`, `cfg.model` instantiation, optimizer/scheduler, compile/SDPA options, and shared trace helpers. +- Task modules own batch parsing, loss, metrics, and prediction/evaluation outputs. +- DataModules own splits, dataloaders, sampler/collate policy, and data preparation boundaries. +- Aim trace uses Lightning loggers for scalars and explicit `experiment.track(...)` for images/distributions. + +## References + +- `references/architecture.md`: core relationships and file layout. +- `references/aim-trace.md`: evidence naming, context, and Aimx query conventions. +- `references/migration-audit.md`: read-only audit checklist and migration staging. + +## Scripts And Assets + +- `scripts/scaffold_repo.py`: copies `assets/template-repo`. +- `scripts/audit_repo.py`: scans a repository without writing to it. +- `scripts/plan_migration.py`: generates a staged migration plan from audit JSON. +- `assets/template-repo`: minimal runnable Hydra + Lightning + Aim template. diff --git a/skills/aimx-hydra-lightning-builder/agents/openai.yaml b/skills/aimx-hydra-lightning-builder/agents/openai.yaml new file mode 100644 index 0000000..738bb20 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Aimx Hydra Lightning Builder" + short_description: "Build Hydra Lightning Aim repositories" + default_prompt: "Use $aimx-hydra-lightning-builder to scaffold or audit a Hydra Lightning repository for Aimx AutoResearch." diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/.gitignore b/skills/aimx-hydra-lightning-builder/assets/template-repo/.gitignore new file mode 100644 index 0000000..17bbf12 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/.gitignore @@ -0,0 +1,7 @@ +.venv/ +__pycache__/ +.pytest_cache/ +.ruff_cache/ +outputs/ +multirun/ +.aim/ diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/.project-root b/skills/aimx-hydra-lightning-builder/assets/template-repo/.project-root new file mode 100644 index 0000000..62471ee --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/.project-root @@ -0,0 +1 @@ +root marker for rootutils diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/README.md b/skills/aimx-hydra-lightning-builder/assets/template-repo/README.md new file mode 100644 index 0000000..d537ab6 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/README.md @@ -0,0 +1,27 @@ +# {{ project_name }} + +Hydra + Lightning + Aim template for Aimx AutoResearch. + +## Quick Start + +```bash +uv sync +uv run python src/train.py trainer.fast_dev_run=true trainer.logger=false +uv run pytest +``` + +Enable Aim logging by leaving `trainer.logger=true` and using `logger=aim`. + +```bash +uv run python src/train.py +aimx query params "run.hash != ''" --repo . +aimx query metrics "metric.name != ''" --repo . +aimx query metrics "metric.name == 'acc'" --repo . --json +``` + +System parameters are not logged by default to avoid storing environment +variables in experiment evidence. Opt in only for safe environments: + +```bash +uv run python src/train.py logger.aim.log_system_params=true +``` diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/accelerate/default.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/accelerate/default.yaml new file mode 100644 index 0000000..01b54c5 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/accelerate/default.yaml @@ -0,0 +1,3 @@ +compile: false +precision: "32-true" +fp32_matmul_precision: "highest" diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/autoresearch/default.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/autoresearch/default.yaml new file mode 100644 index 0000000..c6eed98 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/autoresearch/default.yaml @@ -0,0 +1,17 @@ +stack: "{{ stack }}" +train_command: "uv run python src/train.py" +fast_dev_command: "uv run python src/train.py trainer.fast_dev_run=true trainer.logger=false" +config_entrypoint: "configs/train.yaml" +aim_repo: ${paths.root_dir} +experiment_name: "{{ project_name }}" +objective: + metric: "acc" + direction: "maximize" + context: + subset: "val" +evidence: + params: true + metrics: true + traces: true + images: false + distributions: false diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/callbacks/default.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/callbacks/default.yaml new file mode 100644 index 0000000..1f2093b --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/callbacks/default.yaml @@ -0,0 +1,2 @@ +autoresearch_marker: + _target_: {{ package_name }}.callbacks.autoresearch.AutoResearchMarker diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/datamodule/dummy.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/datamodule/dummy.yaml new file mode 100644 index 0000000..ca461c4 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/datamodule/dummy.yaml @@ -0,0 +1,7 @@ +_target_: {{ package_name }}.datamodules.dummy.RandomClassificationDataModule +num_samples: 64 +num_features: 8 +num_classes: 2 +batch_size: 16 +num_workers: 0 +seed: ${seed} diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/logger/aim.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/logger/aim.yaml new file mode 100644 index 0000000..346ba41 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/logger/aim.yaml @@ -0,0 +1,10 @@ +aim: + _target_: aim.pytorch_lightning.AimLogger + repo: ${paths.root_dir} + experiment: ${autoresearch.experiment_name} + train_metric_prefix: "train/" + val_metric_prefix: "val/" + test_metric_prefix: "test/" + system_tracking_interval: 10 + log_system_params: false + capture_terminal_logs: false diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/model/mlp.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/model/mlp.yaml new file mode 100644 index 0000000..3719d5e --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/model/mlp.yaml @@ -0,0 +1,4 @@ +_target_: {{ package_name }}.models.mlp.MLP +in_dim: ${datamodule.num_features} +hidden_dim: 16 +out_dim: ${datamodule.num_classes} diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/opt/default.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/opt/default.yaml new file mode 100644 index 0000000..f079338 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/opt/default.yaml @@ -0,0 +1,4 @@ +optimizer: + _target_: torch.optim.AdamW + lr: 0.001 + weight_decay: 0.0 diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/paths/default.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/paths/default.yaml new file mode 100644 index 0000000..35152d3 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/paths/default.yaml @@ -0,0 +1,3 @@ +root_dir: ${oc.env:PROJECT_ROOT} +output_dir: ${hydra:runtime.output_dir} +work_dir: ${hydra:runtime.cwd} diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/plmodule/classifier.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/plmodule/classifier.yaml new file mode 100644 index 0000000..537b5ed --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/plmodule/classifier.yaml @@ -0,0 +1,2 @@ +_target_: {{ package_name }}.plmodules.classifier.ClassificationModule +_partial_: true diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/train.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/train.yaml new file mode 100644 index 0000000..39acb75 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/train.yaml @@ -0,0 +1,25 @@ +defaults: + - _self_ + - autoresearch: default + - paths: default + - datamodule: dummy + - model: mlp + - plmodule: classifier + - trainer: default + - callbacks: default + - logger: aim + - opt: default + - accelerate: default + +task_name: train +tags: ["dev", "{{ preset }}"] +seed: 42 + +hydra: + run: + dir: outputs/${task_name}/${now:%Y-%m-%d}/${now:%H-%M-%S} + sweep: + dir: multirun/${task_name}/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra:job.num} + job: + chdir: false diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/trainer/default.yaml b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/trainer/default.yaml new file mode 100644 index 0000000..6f37c09 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/configs/trainer/default.yaml @@ -0,0 +1,11 @@ +_target_: lightning.Trainer +default_root_dir: ${paths.output_dir} +accelerator: cpu +devices: 1 +max_epochs: 1 +fast_dev_run: false +logger: true +enable_checkpointing: false +enable_progress_bar: true +log_every_n_steps: 1 +precision: ${accelerate.precision} diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/pyproject.toml b/skills/aimx-hydra-lightning-builder/assets/template-repo/pyproject.toml new file mode 100644 index 0000000..742c2aa --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/pyproject.toml @@ -0,0 +1,39 @@ +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "Aimx AutoResearch {{ stack }} repository" +readme = "README.md" +requires-python = ">=3.10,<3.13" +dependencies = [ + "aim==3.27.0", + "aimx>=0.3.3", + "hydra-core>=1.3", + "lightning>=2.3", + "numpy>=1.24,<2.0.0", + "omegaconf>=2.3", + "rich>=13.7", + "rootutils>=1.0.7", + "sqlalchemy==1.4.49", + "torch>=2.1", +] + +[dependency-groups] +dev = [ + "pytest>=8.0", +] + +[[tool.uv.index]] +name = "pypi" +url = "https://pypi.org/simple" +default = true + +[tool.pytest.ini_options] +pythonpath = ["src"] +testpaths = ["tests"] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/{{ package_name }}"] diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/__init__.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/__init__.py new file mode 100644 index 0000000..dd6592b --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/__init__.py @@ -0,0 +1 @@ +"""Aimx AutoResearch template package.""" diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/callbacks/__init__.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/callbacks/__init__.py new file mode 100644 index 0000000..f707d36 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/callbacks/__init__.py @@ -0,0 +1,3 @@ +from {{ package_name }}.callbacks.autoresearch import AutoResearchMarker + +__all__ = ["AutoResearchMarker"] diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/callbacks/autoresearch.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/callbacks/autoresearch.py new file mode 100644 index 0000000..4d7d37b --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/callbacks/autoresearch.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +import lightning as L + + +class AutoResearchMarker(L.Callback): + """Log a small completion marker before Lightning finalizes loggers.""" + + def on_train_end(self, trainer: L.Trainer, pl_module: L.LightningModule) -> None: + if not trainer.logger: + return + for logger in trainer.loggers: + logger.log_metrics({"autoresearch/complete": 1.0}, step=int(trainer.global_step)) diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/datamodules/__init__.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/datamodules/__init__.py new file mode 100644 index 0000000..a55f901 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/datamodules/__init__.py @@ -0,0 +1,3 @@ +from {{ package_name }}.datamodules.dummy import RandomClassificationDataModule + +__all__ = ["RandomClassificationDataModule"] diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/datamodules/dummy.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/datamodules/dummy.py new file mode 100644 index 0000000..5681e85 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/datamodules/dummy.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import torch +from lightning import LightningDataModule +from torch.utils.data import DataLoader, TensorDataset, random_split + + +class RandomClassificationDataModule(LightningDataModule): + def __init__( + self, + num_samples: int = 64, + num_features: int = 8, + num_classes: int = 2, + batch_size: int = 16, + num_workers: int = 0, + seed: int = 42, + ) -> None: + super().__init__() + self.save_hyperparameters() + self.train_dataset = None + self.val_dataset = None + + def setup(self, stage: str | None = None) -> None: + generator = torch.Generator().manual_seed(int(self.hparams.seed)) + x = torch.randn(int(self.hparams.num_samples), int(self.hparams.num_features), generator=generator) + weights = torch.randn(int(self.hparams.num_features), int(self.hparams.num_classes), generator=generator) + y = torch.argmax(x @ weights, dim=1) + dataset = TensorDataset(x, y) + train_len = max(1, int(0.8 * len(dataset))) + val_len = len(dataset) - train_len + self.train_dataset, self.val_dataset = random_split(dataset, [train_len, val_len], generator=generator) + + def train_dataloader(self) -> DataLoader: + return DataLoader( + self.train_dataset, + batch_size=int(self.hparams.batch_size), + num_workers=int(self.hparams.num_workers), + shuffle=True, + ) + + def val_dataloader(self) -> DataLoader: + return DataLoader( + self.val_dataset, + batch_size=int(self.hparams.batch_size), + num_workers=int(self.hparams.num_workers), + shuffle=False, + ) diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/models/__init__.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/models/__init__.py new file mode 100644 index 0000000..874ca7c --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/models/__init__.py @@ -0,0 +1,3 @@ +from {{ package_name }}.models.mlp import MLP + +__all__ = ["MLP"] diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/models/mlp.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/models/mlp.py new file mode 100644 index 0000000..db16120 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/models/mlp.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from torch import nn + + +class MLP(nn.Module): + def __init__(self, in_dim: int, hidden_dim: int, out_dim: int) -> None: + super().__init__() + self.net = nn.Sequential( + nn.Linear(in_dim, hidden_dim), + nn.ReLU(), + nn.Linear(hidden_dim, out_dim), + ) + + def forward(self, x): + return self.net(x) diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/plmodules/__init__.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/plmodules/__init__.py new file mode 100644 index 0000000..fbb5a18 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/plmodules/__init__.py @@ -0,0 +1,3 @@ +from {{ package_name }}.plmodules.classifier import ClassificationModule + +__all__ = ["ClassificationModule"] diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/plmodules/classifier.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/plmodules/classifier.py new file mode 100644 index 0000000..88fbef8 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/plmodules/classifier.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +import hydra +import lightning as L +import torch +import torch.nn.functional as F +from omegaconf import DictConfig + + +class ClassificationModule(L.LightningModule): + def __init__(self, cfg: DictConfig) -> None: + super().__init__() + self.save_hyperparameters(logger=False) + self.cfg = cfg + self.net = hydra.utils.instantiate(cfg.model) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.net(x) + + def _shared_step(self, batch, mode: str) -> torch.Tensor: + x, y = batch + logits = self(x) + loss = F.cross_entropy(logits, y) + preds = torch.argmax(logits, dim=1) + acc = (preds == y).float().mean() + on_step = mode == "train" + self.log(f"{mode}/loss", loss, on_step=on_step, on_epoch=True, prog_bar=True) + self.log(f"{mode}/acc", acc, on_step=on_step, on_epoch=True, prog_bar=True) + return loss + + def training_step(self, batch, batch_idx: int) -> torch.Tensor: + return self._shared_step(batch, "train") + + def validation_step(self, batch, batch_idx: int) -> None: + self._shared_step(batch, "val") + + def configure_optimizers(self): + return hydra.utils.instantiate(self.cfg.opt.optimizer, params=self.parameters()) diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/__init__.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/__init__.py new file mode 100644 index 0000000..e9b9a78 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/__init__.py @@ -0,0 +1 @@ +"""Utility helpers.""" diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/instantiators.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/instantiators.py new file mode 100644 index 0000000..822bede --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/instantiators.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import hydra +from lightning import Callback +from lightning.pytorch.loggers import Logger +from omegaconf import DictConfig + + +def instantiate_callbacks(callbacks_cfg: DictConfig | None) -> list[Callback]: + callbacks: list[Callback] = [] + if not callbacks_cfg: + return callbacks + if not isinstance(callbacks_cfg, DictConfig): + raise TypeError("Callbacks config must be a DictConfig.") + for _, cb_conf in callbacks_cfg.items(): + if isinstance(cb_conf, DictConfig) and "_target_" in cb_conf: + callbacks.append(hydra.utils.instantiate(cb_conf)) + return callbacks + + +def instantiate_loggers(logger_cfg: DictConfig | None) -> list[Logger]: + loggers: list[Logger] = [] + if not logger_cfg: + return loggers + if not isinstance(logger_cfg, DictConfig): + raise TypeError("Logger config must be a DictConfig.") + for _, lg_conf in logger_cfg.items(): + if isinstance(lg_conf, DictConfig) and "_target_" in lg_conf: + loggers.append(hydra.utils.instantiate(lg_conf)) + return loggers diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/logging.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/logging.py new file mode 100644 index 0000000..0b4ae8c --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/__package__/utils/logging.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from typing import Any + +from omegaconf import OmegaConf + + +def log_hyperparameters(objects: dict[str, Any]) -> None: + cfg = objects["cfg"] + model = objects["model"] + trainer = objects["trainer"] + if not trainer.logger: + return + + hparams = { + "model": OmegaConf.to_container(cfg.model, resolve=True), + "datamodule": OmegaConf.to_container(cfg.datamodule, resolve=True), + "trainer": OmegaConf.to_container(cfg.trainer, resolve=True), + "opt": OmegaConf.to_container(cfg.opt, resolve=True), + "plmodule": OmegaConf.to_container(cfg.plmodule, resolve=True), + "autoresearch": OmegaConf.to_container(cfg.autoresearch, resolve=True), + "task_name": cfg.get("task_name"), + "tags": list(cfg.get("tags") or []), + "model/params/total": sum(p.numel() for p in model.parameters()), + "model/params/trainable": sum(p.numel() for p in model.parameters() if p.requires_grad), + } + + for logger in trainer.loggers: + logger.log_hyperparams(hparams) diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/src/train.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/train.py new file mode 100644 index 0000000..aee0037 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/src/train.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from typing import Any + +import hydra +import rootutils +from hydra.utils import instantiate +from lightning import Callback, LightningDataModule, LightningModule, Trainer, seed_everything +from lightning.pytorch.loggers import Logger +from omegaconf import DictConfig + +from {{ package_name }}.utils.instantiators import instantiate_callbacks, instantiate_loggers +from {{ package_name }}.utils.logging import log_hyperparameters + +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) + + +def instantiate_runtime(cfg: DictConfig) -> dict[str, Any]: + datamodule: LightningDataModule = instantiate(cfg.datamodule) + model: LightningModule = instantiate(cfg.plmodule)(cfg=cfg) + callbacks: list[Callback] = instantiate_callbacks(cfg.get("callbacks")) + logger: list[Logger] | bool = False + if cfg.trainer.get("logger") is not False: + logger = instantiate_loggers(cfg.get("logger")) + trainer: Trainer = instantiate(cfg.trainer, callbacks=callbacks, logger=logger) + return { + "cfg": cfg, + "datamodule": datamodule, + "model": model, + "callbacks": callbacks, + "logger": logger, + "trainer": trainer, + } + + +@hydra.main(version_base=None, config_path="../configs", config_name="train") +def main(cfg: DictConfig) -> dict[str, float]: + if cfg.get("seed") is not None: + seed_everything(cfg.seed, workers=True) + + objects = instantiate_runtime(cfg) + trainer: Trainer = objects["trainer"] + model: LightningModule = objects["model"] + datamodule: LightningDataModule = objects["datamodule"] + + if objects["logger"]: + log_hyperparameters(objects) + + trainer.fit(model=model, datamodule=datamodule) + return { + key: float(value) + for key, value in trainer.callback_metrics.items() + if hasattr(value, "numel") and value.numel() == 1 + } + + +if __name__ == "__main__": + main() diff --git a/skills/aimx-hydra-lightning-builder/assets/template-repo/tests/test_fast_dev_run.py b/skills/aimx-hydra-lightning-builder/assets/template-repo/tests/test_fast_dev_run.py new file mode 100644 index 0000000..d127214 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/assets/template-repo/tests/test_fast_dev_run.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import subprocess +import sys + + +def test_fast_dev_run() -> None: + result = subprocess.run( + [ + sys.executable, + "src/train.py", + "trainer.fast_dev_run=true", + "trainer.logger=false", + "trainer.enable_progress_bar=false", + ], + check=False, + capture_output=True, + text=True, + ) + assert result.returncode == 0, result.stderr diff --git a/skills/aimx-hydra-lightning-builder/references/aim-trace.md b/skills/aimx-hydra-lightning-builder/references/aim-trace.md new file mode 100644 index 0000000..4e276c5 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/references/aim-trace.md @@ -0,0 +1,66 @@ +# Aim Trace Conventions + +Aim is the evidence store for AutoResearch. `aimx` is the read-only query surface. + +## Scalars + +Use slash-separated names in Lightning code: + +- `train/loss` +- `val/loss` +- `val/acc` +- `test/loss` + +Prefer validation or test metrics for objective ranking. + +Aim's Lightning logger may store the slash prefix as context, so a logged +`val/acc` can be queried as metric `acc` with context `{"subset": "val"}`. +Keep that mapping in `configs/autoresearch/default.yaml`. + +## Images + +Track qualitative outputs with context: + +```python +aim_run.track( + Image(fig), + name="prediction", + step=self.global_step, + context={"mode": "val", "batch_idx": batch_idx}, +) +``` + +Use config switches for image frequency and selected batches. Do not log every batch by default. + +## Distributions + +Track distributions only when useful: + +- classifier head weights/gradients; +- activation ranges; +- feature histograms; +- residual/error distributions. + +Use names such as `head/gradients/weight` and context such as `{"module": "head", "kind": "gradients"}`. + +## Hyperparameters + +Log these minimum sections: + +- model config; +- datamodule/data config; +- trainer config; +- optimizer/scheduler config; +- plmodule config; +- task name, tags, objective, and paths. + +## Aimx Queries + +Use explicit repos: + +```bash +aimx query params "run.hash != ''" --repo --json +aimx query metrics "metric.name == 'loss'" --repo --json +aimx trace "metric.name == 'loss'" --repo --json --tail 200 +aimx query images "images.name == 'prediction'" --repo --json --head 20 +``` diff --git a/skills/aimx-hydra-lightning-builder/references/architecture.md b/skills/aimx-hydra-lightning-builder/references/architecture.md new file mode 100644 index 0000000..230e984 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/references/architecture.md @@ -0,0 +1,52 @@ +# Hydra Lightning Aim Architecture + +The core relationship is: + +`Hydra config -> instantiated Lightning runtime -> Aim evidence -> aimx analysis` + +## Config Layer + +Use a primary config such as `configs/train.yaml` with defaults for: + +- `datamodule` +- `model` +- `plmodule` +- `trainer` +- `callbacks` +- `logger` +- `paths` +- `accelerate` +- optional `experiment` + +Experiment configs should override choices and hyperparameters, not duplicate the whole tree. + +## Runtime Layer + +`src/train.py` should: + +- call `rootutils.setup_root` or otherwise make local imports stable; +- seed through Lightning; +- instantiate datamodule, plmodule, callbacks, loggers, and trainer from config; +- log hyperparameters before training when loggers exist; +- run `trainer.fit`, `trainer.validate`, or `trainer.test` based on config; +- return compact metrics when used programmatically. + +## Module Layer + +`BaseLitModule` should own common behavior: + +- store the full `cfg`; +- instantiate `cfg.model`; +- configure optimizer and scheduler from `cfg.opt`; +- apply compile, precision, or SDPA settings from `cfg.accelerate`; +- provide helper methods for Aim experiments when explicit traces are needed. + +Task subclasses should own only domain logic: batch parsing, forward call, loss, metrics, prediction outputs, and optional qualitative trace artifacts. + +## Data Layer + +`LightningDataModule` classes own data preparation and dataloaders. Keep user data paths in config. Use dummy/random data in templates so fast validation does not depend on private datasets. + +## Evidence Layer + +Use Lightning `self.log` for scalar metrics. Use Aim `experiment.track(...)` for images and distributions. Keep evidence names stable and context-rich so `aimx` can query them later. diff --git a/skills/aimx-hydra-lightning-builder/references/migration-audit.md b/skills/aimx-hydra-lightning-builder/references/migration-audit.md new file mode 100644 index 0000000..cc1cf5c --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/references/migration-audit.md @@ -0,0 +1,29 @@ +# Migration Audit + +Migration starts with a read-only audit. Do not edit the target repository during audit. + +## Audit Checklist + +Inspect: + +- dependency manager and Python version; +- training entrypoints; +- Hydra config root and defaults composition; +- model, datamodule, plmodule, trainer, callback, logger config groups; +- LightningModule and LightningDataModule classes; +- metric logging through `self.log`; +- AimLogger config and direct `experiment.track(...)` traces; +- hyperparameter logging; +- fast validation command; +- project-local Aim repo path. + +## Migration Stages + +1. Establish the AutoResearch contract. +2. Add or normalize Hydra config groups. +3. Move runtime orchestration into `src/train.py`. +4. Adapt model/data/task code into Lightning boundaries. +5. Add Aim/Aimx evidence conventions. +6. Add fast validation tests. + +Keep migration patches small and reversible. diff --git a/skills/aimx-hydra-lightning-builder/scripts/audit_repo.py b/skills/aimx-hydra-lightning-builder/scripts/audit_repo.py new file mode 100644 index 0000000..31a8627 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/scripts/audit_repo.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 +"""Read-only Hydra + Lightning + Aim repository audit.""" + +from __future__ import annotations + +import argparse +import json +import re +from pathlib import Path +from typing import Any + +try: + import tomllib +except ModuleNotFoundError: # pragma: no cover - Python < 3.11 fallback + tomllib = None # type: ignore[assignment] + + +TEXT_EXTENSIONS = {".md", ".py", ".toml", ".txt", ".yaml", ".yml"} +IGNORED_PARTS = { + ".git", + ".venv", + "__pycache__", + ".ipynb_checkpoints", + ".mypy_cache", + ".pytest_cache", + ".ruff_cache", + "multirun", + "outputs", +} +CONFIG_GROUPS = { + "accelerate", + "callbacks", + "data", + "datamodule", + "experiment", + "logger", + "loss", + "metrics", + "model", + "opt", + "paths", + "plmodule", + "trainer", +} +STACK = "hydra-lightning" + + +def read_text(path: Path, limit: int = 300_000) -> str: + try: + return path.read_text(encoding="utf-8", errors="ignore")[:limit] + except OSError: + return "" + + +def iter_text_files(repo: Path) -> list[Path]: + files: list[Path] = [] + for path in repo.rglob("*"): + if any(part in IGNORED_PARTS for part in path.parts): + continue + if path.is_file() and path.suffix in TEXT_EXTENSIONS: + files.append(path) + return files + + +def rel(repo: Path, path: Path) -> str: + return path.relative_to(repo).as_posix() + + +def grep(files: list[Path], pattern: str) -> list[Path]: + regex = re.compile(pattern) + return [path for path in files if regex.search(read_text(path))] + + +def status(condition: bool) -> str: + return "pass" if condition else "missing" + + +def normalize_dist_name(requirement: str) -> str: + name = requirement.strip() + name = re.split(r"\s*(?:\[|==|~=|!=|<=|>=|<|>|=|;|\s)", name, maxsplit=1)[0] + return re.sub(r"[-_.]+", "-", name).lower() + + +def normalized_dependency_set(requirements: list[str]) -> set[str]: + return {name for item in requirements if (name := normalize_dist_name(item))} + + +def collect_quoted_dependency_specs(text: str) -> set[str]: + dependencies: list[str] = [] + for value in re.findall(r"[\"']([^\"']+)[\"']", text): + if re.search(r"[<>=~!]", value): + dependencies.append(value) + return normalized_dependency_set(dependencies) + + +def collect_pyproject_dependencies(repo: Path) -> set[str]: + path = repo / "pyproject.toml" + if not path.exists(): + return set() + text = read_text(path) + if tomllib is None: + return collect_quoted_dependency_specs(text) + try: + data = tomllib.loads(text) + except tomllib.TOMLDecodeError: + return collect_quoted_dependency_specs(text) + + dependencies: list[str] = [] + project = data.get("project", {}) + dependencies.extend(project.get("dependencies", [])) + for values in project.get("optional-dependencies", {}).values(): + dependencies.extend(item for item in values if isinstance(item, str)) + for values in data.get("dependency-groups", {}).values(): + dependencies.extend(item for item in values if isinstance(item, str)) + + poetry_deps = data.get("tool", {}).get("poetry", {}).get("dependencies", {}) + dependencies.extend(name for name in poetry_deps if name.lower() != "python") + + return normalized_dependency_set(dependencies) + + +def collect_requirements_dependencies(repo: Path) -> set[str]: + dependencies: set[str] = set() + candidates = list(repo.glob("requirements*.txt")) + requirements_dir = repo / "requirements" + if requirements_dir.is_dir(): + candidates.extend(requirements_dir.glob("*.txt")) + + for path in candidates: + for line in read_text(path).splitlines(): + line = line.split("#", maxsplit=1)[0].strip() + if not line or line.startswith(("-", ".")): + continue + name = normalize_dist_name(line) + if name: + dependencies.add(name) + return dependencies + + +def collect_dependencies(repo: Path) -> set[str]: + return collect_pyproject_dependencies(repo) | collect_requirements_dependencies(repo) + + +def audit(repo: Path, stack: str = STACK) -> dict[str, Any]: + files = iter_text_files(repo) + rel_files = {rel(repo, path) for path in files} + configs_dir = repo / "configs" + config_groups = sorted(path.name for path in configs_dir.iterdir() if path.is_dir()) if configs_dir.exists() else [] + yaml_files = [path for path in files if path.suffix in {".yaml", ".yml"}] + py_files = [path for path in files if path.suffix == ".py"] + + dependencies = collect_dependencies(repo) + hydra_entrypoints = grep(py_files, r"@hydra\.main|hydra\.main\(") + lightning_modules = grep(py_files, r"LightningModule|lightning\.LightningModule|L\.LightningModule") + lightning_datamodules = grep(py_files, r"LightningDataModule|lightning\.LightningDataModule") + trainer_usage = grep(py_files, r"\bTrainer\b|trainer\.fit|trainer\.validate|trainer\.test") + aim_logger_configs = grep(yaml_files, r"aim\.pytorch_lightning\.AimLogger") + direct_aim_tracks = grep(py_files, r"experiment\.track|aim_run\.track|from aim import") + scalar_logs = grep(py_files, r"\bself\.log\(") + hparam_logs = grep(py_files, r"log_hyperparams|log_hyperparameters") + + required_groups = {"datamodule", "model", "plmodule", "trainer", "logger", "paths"} + present_required_groups = required_groups.intersection(config_groups) + + checks = [ + ( + "uv_or_pyproject", + "pass" if (repo / "pyproject.toml").exists() else "partial", + "Python project metadata exists." if (repo / "pyproject.toml").exists() else "No pyproject.toml detected.", + ), + ( + "hydra_dependency", + status("hydra-core" in dependencies), + "Hydra dependency detected." if "hydra-core" in dependencies else "No exact hydra-core dependency detected.", + ), + ( + "lightning_dependency", + status(bool({"lightning", "pytorch-lightning"}.intersection(dependencies))), + "Lightning dependency detected." + if {"lightning", "pytorch-lightning"}.intersection(dependencies) + else "No exact lightning dependency detected.", + ), + ( + "aim_dependency", + status("aim" in dependencies), + "Aim dependency detected." if "aim" in dependencies else "No exact aim dependency detected.", + ), + ("hydra_entrypoint", status(bool(hydra_entrypoints)), "Hydra main entrypoint detected."), + ("config_groups", "pass" if len(present_required_groups) == len(required_groups) else "partial", f"Found config groups: {', '.join(config_groups) or '-'}"), + ("lightning_module", status(bool(lightning_modules)), "LightningModule implementation detected."), + ("lightning_datamodule", status(bool(lightning_datamodules)), "LightningDataModule implementation detected."), + ("trainer_orchestration", status(bool(trainer_usage)), "Trainer orchestration detected."), + ("aim_logger", status(bool(aim_logger_configs)), "AimLogger config detected."), + ("scalar_metrics", status(bool(scalar_logs)), "Lightning self.log metrics detected."), + ( + "explicit_artifacts", + "pass" if direct_aim_tracks else "partial", + "Explicit Aim artifact/distribution traces detected." + if direct_aim_tracks + else "No explicit Aim artifact/distribution traces detected.", + ), + ( + "hyperparameters", + "pass" if hparam_logs else "partial", + "Hyperparameter logging detected." if hparam_logs else "No explicit hyperparameter logging helper detected.", + ), + ( + "tests", + "pass" if (repo / "tests").exists() else "partial", + "Tests directory detected." if (repo / "tests").exists() else "No top-level tests directory detected.", + ), + ] + + return { + "repo": str(repo), + "stack": stack, + "score": sum(1 for _, item_status, _ in checks if item_status == "pass"), + "max_score": len(checks), + "checks": [{"id": item_id, "status": item_status, "detail": detail} for item_id, item_status, detail in checks], + "evidence": { + "entrypoints": sorted(path for path in rel_files if path in {"src/train.py", "train.py", "main.py"}), + "hydra_entrypoints": [rel(repo, path) for path in hydra_entrypoints[:20]], + "config_groups": config_groups, + "lightning_modules": [rel(repo, path) for path in lightning_modules[:30]], + "lightning_datamodules": [rel(repo, path) for path in lightning_datamodules[:30]], + "aim_logger_configs": [rel(repo, path) for path in aim_logger_configs[:20]], + "direct_aim_tracks": [rel(repo, path) for path in direct_aim_tracks[:30]], + "dependencies": sorted(dependencies), + }, + } + + +def to_markdown(payload: dict[str, Any]) -> str: + lines = [ + f"# Hydra Lightning Audit: {Path(payload['repo']).name}", + "", + f"- Score: {payload['score']}/{payload['max_score']}", + "", + "| Check | Status | Detail |", + "| --- | --- | --- |", + ] + for check in payload["checks"]: + lines.append(f"| `{check['id']}` | {check['status']} | {check['detail']} |") + lines.extend(["", "## Evidence", ""]) + for key, value in payload["evidence"].items(): + if isinstance(value, list): + shown = ", ".join(value[:8]) or "-" + lines.append(f"- `{key}`: {shown}") + return "\n".join(lines) + "\n" + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--stack", default=STACK, choices=[STACK]) + parser.add_argument("--repo", required=True, type=Path) + parser.add_argument("--format", choices=["json", "markdown"], default="markdown") + args = parser.parse_args() + + repo = args.repo.resolve() + if not repo.exists() or not repo.is_dir(): + parser.error(f"--repo must be an existing directory: {repo}") + + payload = audit(repo, args.stack) + if args.format == "json": + print(json.dumps(payload, indent=2, ensure_ascii=False)) + else: + print(to_markdown(payload), end="") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/aimx-hydra-lightning-builder/scripts/plan_migration.py b/skills/aimx-hydra-lightning-builder/scripts/plan_migration.py new file mode 100644 index 0000000..bec1d37 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/scripts/plan_migration.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +"""Generate a Hydra Lightning migration plan from audit JSON.""" + +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any + + +STACK = "hydra-lightning" +STAGES = { + "uv_or_pyproject": "Normalize project metadata and dependency workflow around `pyproject.toml` and `uv`.", + "hydra_dependency": "Add Hydra and OmegaConf dependencies, then introduce a `configs/` tree.", + "lightning_dependency": "Add Lightning and move training lifecycle into Lightning modules and Trainer.", + "aim_dependency": "Add Aim as the experiment evidence backend.", + "hydra_entrypoint": "Create `src/train.py` with `@hydra.main` as the canonical entrypoint.", + "config_groups": "Split config into datamodule/model/plmodule/trainer/logger/paths groups.", + "lightning_module": "Wrap task logic in a LightningModule with stable train/val/test/predict hooks.", + "lightning_datamodule": "Move dataset split and DataLoader policy into LightningDataModule classes.", + "trainer_orchestration": "Instantiate Trainer, callbacks, loggers, datamodule, and plmodule from Hydra config.", + "aim_logger": "Add `configs/logger/aim.yaml` with `aim.pytorch_lightning.AimLogger`.", + "scalar_metrics": "Log objective and diagnostic metrics with `self.log` using stable names.", + "explicit_artifacts": "Add optional `experiment.track(...)` hooks for images or distributions.", + "hyperparameters": "Log model/data/trainer/optimizer/task config to all configured loggers.", + "tests": "Add a dummy-data fast-dev-run test that does not require private datasets.", +} + + +def load_audit(path: Path) -> dict[str, Any]: + return json.loads(path.read_text(encoding="utf-8")) + + +def build_plan(audit: dict[str, Any], target_stack: str) -> str: + lines = [ + f"# Hydra Lightning Migration Plan: {Path(audit['repo']).name}", + "", + f"- Target stack: `{target_stack}`", + f"- Audit score: {audit['score']}/{audit['max_score']}", + "", + "## Ordered Stages", + ] + number = 1 + for check in audit["checks"]: + if check["status"] == "pass": + continue + stage = STAGES.get(check["id"]) + if stage: + lines.append(f"{number}. {stage}") + number += 1 + if number == 1: + lines.append("1. No missing core stages from the audit. Review naming and evidence quality before changing code.") + + lines.extend( + [ + "", + "## Acceptance Criteria", + "- `uv run python src/train.py trainer.fast_dev_run=true trainer.logger=false` succeeds on dummy or tiny data.", + "- AimLogger can be enabled by config without changing code.", + "- `aimx query params` and `aimx query metrics` can inspect the generated Aim repo.", + "- Existing model/data behavior is preserved or covered by focused migration tests.", + "", + "## Safety", + "- This output is a plan only.", + "- Do not edit the audited repository until the user approves execution.", + ] + ) + return "\n".join(lines) + "\n" + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--audit", required=True, type=Path) + parser.add_argument("--target-stack", default=STACK, choices=[STACK]) + args = parser.parse_args() + + audit = load_audit(args.audit) + audit_stack = audit.get("stack") + if audit_stack and audit_stack != args.target_stack: + parser.error(f"audit stack {audit_stack!r} does not match target stack {args.target_stack!r}") + print(build_plan(audit, args.target_stack), end="") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/skills/aimx-hydra-lightning-builder/scripts/scaffold_repo.py b/skills/aimx-hydra-lightning-builder/scripts/scaffold_repo.py new file mode 100644 index 0000000..3285906 --- /dev/null +++ b/skills/aimx-hydra-lightning-builder/scripts/scaffold_repo.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +"""Scaffold a minimal Hydra + Lightning + Aim repository.""" + +from __future__ import annotations + +import argparse +import keyword +import shutil +import sys +from pathlib import Path + + +STACK = "hydra-lightning" +TEXT_EXTENSIONS = {".md", ".py", ".toml", ".txt", ".yaml", ".yml"} + + +def validate_package(name: str) -> str: + normalized = name.replace("-", "_") + if not normalized.isidentifier() or keyword.iskeyword(normalized): + raise argparse.ArgumentTypeError(f"Package must be a valid Python identifier: {name}") + return normalized + + +def copy_template(template: Path, output: Path, replacements: dict[str, str]) -> None: + if output.exists() and any(output.iterdir()): + raise FileExistsError(f"Output directory exists and is not empty: {output}") + output.mkdir(parents=True, exist_ok=True) + + for src in template.rglob("*"): + relative = src.relative_to(template) + parts = [replacements.get(part, part) for part in relative.parts] + dst = output.joinpath(*parts) + if src.is_dir(): + dst.mkdir(parents=True, exist_ok=True) + continue + dst.parent.mkdir(parents=True, exist_ok=True) + if src.suffix in TEXT_EXTENSIONS: + text = src.read_text(encoding="utf-8") + for key, value in replacements.items(): + text = text.replace(f"{{{{ {key} }}}}", value) + dst.write_text(text, encoding="utf-8") + else: + shutil.copy2(src, dst) + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--stack", default=STACK, choices=[STACK]) + parser.add_argument("--name", required=True, help="Project distribution name.") + parser.add_argument("--package", required=True, type=validate_package, help="Python import package name.") + parser.add_argument("--preset", default="classification", choices=["classification", "forecast"]) + parser.add_argument("--output", required=True, type=Path) + args = parser.parse_args() + + skill_dir = Path(__file__).resolve().parents[1] + template = skill_dir / "assets" / "template-repo" + if not template.exists(): + print(f"Template not found: {template}", file=sys.stderr) + return 2 + + copy_template( + template, + args.output.resolve(), + { + "__package__": args.package, + "project_name": args.name, + "package_name": args.package, + "preset": args.preset, + "stack": args.stack, + }, + ) + print(f"Created {args.stack} repository at {args.output.resolve()}") + print("Next: cd into the repo, run `uv sync`, then `uv run pytest`.") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())