diff --git a/tests/test_cell_annotation_plugin.py b/tests/test_cell_annotation_plugin.py index 63c37fa..1a82199 100644 --- a/tests/test_cell_annotation_plugin.py +++ b/tests/test_cell_annotation_plugin.py @@ -144,12 +144,44 @@ def test_manifest_load_and_save_round_trip(self): manifest.save_atomic() self.assertEqual(Manifest(root).load(), {"checkpoints": []}) - def test_manifest_rebuild_stub_resets_to_empty_dict(self): + def test_manifest_rebuild_discovers_checkpoint_artifacts_and_persists_manifest(self): with tempfile.TemporaryDirectory() as root: manifest = Manifest(root) - manifest.data["checkpoints"] = ["stale"] - self.assertEqual(manifest.rebuild_from_disk(), {}) - self.assertEqual(manifest.data, {}) + checkpoints_dir = Path(root) / "checkpoints" + thumbnails_dir = Path(root) / "thumbnails" + selections_dir = Path(root) / "selections" + checkpoints_dir.mkdir() + thumbnails_dir.mkdir() + selections_dir.mkdir() + + (checkpoints_dir / "abc123.json").write_text( + json.dumps({"id": "abc123", "parents": ["root"], "op": "save"}), + encoding="utf-8", + ) + (checkpoints_dir / "ignored.partial.json").write_text("{}", encoding="utf-8") + (thumbnails_dir / "abc123.png").write_text("png", encoding="utf-8") + (selections_dir / "abc123.parquet").write_text("parquet", encoding="utf-8") + + rebuilt = manifest.rebuild_from_disk() + + self.assertEqual( + rebuilt, + { + "checkpoints": [ + { + "id": "abc123", + "parents": ["root"], + "op": "save", + "artifacts": { + "checkpoint": "checkpoints/abc123.json", + "thumbnail": "thumbnails/abc123.png", + "selection": "selections/abc123.parquet", + }, + } + ] + }, + ) + self.assertEqual(json.loads(manifest.path.read_text(encoding="utf-8")), rebuilt) class TestSelectionSpec(unittest.TestCase): @@ -203,6 +235,33 @@ def test_plugin_lifecycle_initializes_store_manifest_and_providers(self): self.assertIsNone(plugin.store) self.assertIsNone(plugin.manifest) + def test_plugin_rebuilds_manifest_when_missing(self): + plugin = CellAnnotationPlugin(MagicMock()) + + with tempfile.TemporaryDirectory() as dataset_root: + store = DatasetStore(dataset_root) + store.ensure_dirs() + (store.subdir("checkpoints") / "checkpoint-a.json").write_text( + json.dumps({"op": "save"}), + encoding="utf-8", + ) + + plugin.on_dataset_opened(dataset_root) + + self.assertEqual( + plugin.manifest.data, + { + "checkpoints": [ + { + "id": "checkpoint-a", + "op": "save", + "artifacts": {"checkpoint": "checkpoints/checkpoint-a.json"}, + } + ] + }, + ) + self.assertTrue(plugin.manifest.path.exists()) + class TestProviderStubMethods(unittest.TestCase): def test_heatmap_import_stub_records_last_path(self): diff --git a/ueler/viewer/plugin/cell_annotation/manifest.py b/ueler/viewer/plugin/cell_annotation/manifest.py index a873aad..e93c9a9 100644 --- a/ueler/viewer/plugin/cell_annotation/manifest.py +++ b/ueler/viewer/plugin/cell_annotation/manifest.py @@ -36,12 +36,58 @@ def save_atomic(self) -> None: atomic_write_json(self._path, self._data) def rebuild_from_disk(self) -> dict[str, Any]: - """Stub manifest rebuild used until checkpoint scanning lands. + """Rebuild ``manifest.json`` from checkpoint-sidecar artifacts on disk.""" - TODO: replace this with a directory walk that scans checkpoint, thumbnail, - and selection artifacts, ignores ``*.partial`` files, and rebuilds the - persisted DAG metadata in ``manifest.json``. - """ + checkpoints_dir = self._store_path / "checkpoints" + thumbnails = self._artifact_map(self._store_path / "thumbnails") + selections = self._artifact_map(self._store_path / "selections") + checkpoints: list[dict[str, Any]] = [] - self._data = {} + if checkpoints_dir.exists(): + for metadata_path in sorted(checkpoints_dir.glob("*.json")): + if self._is_partial(metadata_path): + continue + with open(metadata_path, "r", encoding="utf-8") as handle: + payload = json.load(handle) + if not isinstance(payload, dict): + continue + + checkpoint = dict(payload) + checkpoint_id = str(checkpoint.get("id") or metadata_path.stem) + artifacts = checkpoint.setdefault("artifacts", {}) + if not isinstance(artifacts, dict): + artifacts = {} + checkpoint["artifacts"] = artifacts + checkpoint["id"] = checkpoint_id + artifacts.setdefault("checkpoint", self._relative_path(metadata_path)) + + thumbnail_path = thumbnails.get(checkpoint_id) + if thumbnail_path is not None: + artifacts.setdefault("thumbnail", self._relative_path(thumbnail_path)) + + selection_path = selections.get(checkpoint_id) + if selection_path is not None: + artifacts.setdefault("selection", self._relative_path(selection_path)) + + checkpoints.append(checkpoint) + + self._data = {"checkpoints": checkpoints} + self.save_atomic() return self._data + + def _artifact_map(self, directory: Path) -> dict[str, Path]: + artifacts: dict[str, Path] = {} + if not directory.exists(): + return artifacts + for path in sorted(directory.iterdir()): + if not path.is_file() or self._is_partial(path): + continue + artifacts.setdefault(path.stem, path) + return artifacts + + def _relative_path(self, path: Path) -> str: + return path.relative_to(self._store_path).as_posix() + + @staticmethod + def _is_partial(path: Path) -> bool: + return path.name.endswith(".partial") or ".partial" in path.suffixes diff --git a/ueler/viewer/plugin/cell_annotation/plugin.py b/ueler/viewer/plugin/cell_annotation/plugin.py index 983b90f..1b7f85a 100644 --- a/ueler/viewer/plugin/cell_annotation/plugin.py +++ b/ueler/viewer/plugin/cell_annotation/plugin.py @@ -50,7 +50,8 @@ def on_dataset_opened(self, base_folder: str | Path) -> None: self._store = DatasetStore(base_folder) self._store.ensure_dirs() self._manifest = Manifest(self._store.store_path) - self._manifest.load() + if self._manifest.load() is None: + self._manifest.rebuild_from_disk() def on_dataset_closed(self) -> None: self._store = None