diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py index 92ff504a..3da68d18 100644 --- a/src/basic_memory/config.py +++ b/src/basic_memory/config.py @@ -658,6 +658,26 @@ def get_project_mode(self, project_name: str) -> ProjectMode: entry = self.projects.get(project_name) return entry.mode if entry else ProjectMode.CLOUD + def is_locally_syncable(self, project_name: str, project_path: str) -> bool: + """Whether a project should be synced/watched on the local filesystem. + + Both conditions are required (issue #949): + + * The project is present in config. Config is the source of truth, so a + stale database row that was removed from config — but whose deletion + has not yet been reconciled, or whose reconciliation failed — must + not be synced even though it still has a real directory on disk. + * Its path is absolute. An empty or relative path resolves against the + process cwd, so syncing it would adopt whatever directory the server + was launched from as the project root and mutate unrelated files. + + Cloud-only projects (empty/slug path) and cloud projects with a real + local bisync copy (absolute path) are handled correctly by these two + conditions, so no separate mode check is needed. + """ + entry = self.projects.get(project_name) + return entry is not None and Path(project_path).is_absolute() + def set_project_mode(self, project_name: str, mode: ProjectMode) -> None: """Set the routing mode for a project. diff --git a/src/basic_memory/services/initialization.py b/src/basic_memory/services/initialization.py index c4b3f6dc..784b54de 100644 --- a/src/basic_memory/services/initialization.py +++ b/src/basic_memory/services/initialization.py @@ -13,7 +13,7 @@ from loguru import logger from basic_memory import db -from basic_memory.config import BasicMemoryConfig, DatabaseBackend, ProjectMode +from basic_memory.config import BasicMemoryConfig, DatabaseBackend from basic_memory.models import Project from basic_memory.repository import ( ProjectRepository, @@ -120,18 +120,14 @@ async def initialize_file_sync( active_projects = [p for p in active_projects if p.name == constrained_project] logger.info(f"Background sync constrained to project: {constrained_project}") - # Skip cloud-mode projects that have no local directory. - # Cloud projects with a local bisync copy (absolute path) are kept for local sync. - cloud_skip = [] - for p in active_projects: - if app_config.get_project_mode(p.name) == ProjectMode.CLOUD: - entry = app_config.projects.get(p.name) - if entry and Path(entry.path).is_absolute(): - continue # Cloud project with local bisync copy — keep for local sync - cloud_skip.append(p.name) - if cloud_skip: - active_projects = [p for p in active_projects if p.name not in cloud_skip] - logger.info(f"Skipping cloud-mode projects for local sync: {cloud_skip}") + # Only sync projects that are in config (source of truth) and have an + # absolute local path; see BasicMemoryConfig.is_locally_syncable. This keeps + # background sync from adopting the process cwd as a project root and + # mutating unrelated files (issue #949). + skip = [p.name for p in active_projects if not app_config.is_locally_syncable(p.name, p.path)] + if skip: + active_projects = [p for p in active_projects if p.name not in skip] + logger.info(f"Skipping projects that are not locally syncable for sync: {skip}") # Start sync for all projects as background tasks (non-blocking) async def sync_project_background(project: Project): diff --git a/src/basic_memory/sync/watch_service.py b/src/basic_memory/sync/watch_service.py index 599e042a..ccf37d91 100644 --- a/src/basic_memory/sync/watch_service.py +++ b/src/basic_memory/sync/watch_service.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from basic_memory.sync.sync_service import SyncService -from basic_memory.config import BasicMemoryConfig, ProjectMode, WATCH_STATUS_JSON +from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path from basic_memory.models import Project from basic_memory.repository import ProjectRepository @@ -184,24 +184,24 @@ async def _select_projects_to_watch(self) -> list[Project]: ``--project``, only that project is watched. This keeps concurrent MCP processes from producing duplicate watchers that race on the same files. - 2. Cloud-only projects without a local bisync copy are skipped so we - don't watch a path that does not exist on disk. + 2. Projects that are not locally syncable are skipped — those missing + from config (config is the source of truth, so stale DB rows must + not be watched) or with a non-absolute path (which would resolve + against the process cwd and make the watcher observe and mutate the + directory the server was launched from). See + ``BasicMemoryConfig.is_locally_syncable`` (issue #949). Cloud + projects with a local bisync copy keep their absolute path and are + still watched. """ projects = await self.project_repository.get_active_projects() if self.constrained_project: projects = [p for p in projects if p.name == self.constrained_project] - cloud_skip: list[str] = [] - for p in projects: - if self.app_config.get_project_mode(p.name) == ProjectMode.CLOUD: - entry = self.app_config.projects.get(p.name) - if entry and Path(entry.path).is_absolute(): - continue # Cloud project with local bisync copy — keep watching - cloud_skip.append(p.name) - if cloud_skip: - projects = [p for p in projects if p.name not in cloud_skip] - logger.debug(f"Skipping cloud-mode projects in watch cycle: {cloud_skip}") + skip = [p.name for p in projects if not self.app_config.is_locally_syncable(p.name, p.path)] + if skip: + projects = [p for p in projects if p.name not in skip] + logger.debug(f"Skipping projects that are not locally syncable in watch cycle: {skip}") return list(projects) diff --git a/tests/services/test_initialization.py b/tests/services/test_initialization.py index 79343129..8723cba6 100644 --- a/tests/services/test_initialization.py +++ b/tests/services/test_initialization.py @@ -222,6 +222,57 @@ async def test_initialize_file_sync_no_constraint_when_env_unset( assert _FakeWatchService.last_kwargs.get("constrained_project") is None +@pytest.mark.asyncio +async def test_initialize_file_sync_skips_project_with_non_absolute_path( + app_config: BasicMemoryConfig, config_manager, config_home, monkeypatch +): + """Projects without an absolute local path are excluded from background sync (issue #949). + + A config entry of ``{"path": ""}`` defaults to LOCAL mode and is not + recognized as cloud, yet Path("") resolves to the process cwd. Syncing it + would inject frontmatter into unrelated files, so it must be skipped. + """ + await db.shutdown_db() + try: + from basic_memory.config import ProjectEntry + + good = config_home / "good" + good.mkdir(parents=True, exist_ok=True) + + updated = app_config.model_copy( + update={ + "projects": { + "good": ProjectEntry(path=str(good)), + # No mode -> defaults to LOCAL, empty (cwd-relative) path. + "empty-path": ProjectEntry(path=""), + }, + "default_project": "good", + } + ) + config_manager.save_config(updated) + + await initialize_database(updated) + await reconcile_projects_with_config(updated) + + _disable_test_env_short_circuit(monkeypatch) + monkeypatch.setattr("basic_memory.sync.WatchService", _FakeWatchService) + + infos: list[str] = [] + monkeypatch.setattr( + "basic_memory.services.initialization.logger.info", + lambda message, *args, **kwargs: infos.append(message), + ) + + await initialize_file_sync(updated, quiet=True) + + skip_logs = [m for m in infos if "not locally syncable" in m] + assert skip_logs, "expected a skip log for the empty-path project" + assert "empty-path" in skip_logs[0] + assert "good" not in skip_logs[0] + finally: + await db.shutdown_db() + + @pytest.mark.asyncio async def test_initialize_app_no_precedence_warning_when_not_conflicting( app_config: BasicMemoryConfig, monkeypatch diff --git a/tests/sync/test_watch_service.py b/tests/sync/test_watch_service.py index 2a8ac200..e5c4d211 100644 --- a/tests/sync/test_watch_service.py +++ b/tests/sync/test_watch_service.py @@ -71,15 +71,18 @@ async def _register_local_projects( @pytest.mark.asyncio async def test_select_projects_to_watch_returns_all_when_unconstrained( - app_config: BasicMemoryConfig, project_repository + app_config: BasicMemoryConfig, project_repository, tmp_path ): """Without a --project constraint, every active project is watched.""" + # Use tmp_path so the project paths are OS-absolute on Windows too — a + # POSIX-style "/tmp/alpha" is not absolute on Windows (no drive letter), + # and _select_projects_to_watch now skips non-absolute paths (issue #949). await _register_local_projects( app_config, project_repository, [ - {"name": "project-alpha", "path": "/tmp/alpha"}, - {"name": "project-beta", "path": "/tmp/beta"}, + {"name": "project-alpha", "path": str(tmp_path / "alpha")}, + {"name": "project-beta", "path": str(tmp_path / "beta")}, ], ) @@ -94,7 +97,7 @@ async def test_select_projects_to_watch_returns_all_when_unconstrained( @pytest.mark.asyncio async def test_select_projects_to_watch_filters_to_constrained_project( - app_config: BasicMemoryConfig, project_repository + app_config: BasicMemoryConfig, project_repository, tmp_path ): """With ``constrained_project`` set, only that project is returned. @@ -106,8 +109,8 @@ async def test_select_projects_to_watch_filters_to_constrained_project( app_config, project_repository, [ - {"name": "project-alpha", "path": "/tmp/alpha"}, - {"name": "project-beta", "path": "/tmp/beta"}, + {"name": "project-alpha", "path": str(tmp_path / "alpha")}, + {"name": "project-beta", "path": str(tmp_path / "beta")}, ], ) @@ -124,13 +127,13 @@ async def test_select_projects_to_watch_filters_to_constrained_project( @pytest.mark.asyncio async def test_select_projects_to_watch_empty_when_constrained_project_missing( - app_config: BasicMemoryConfig, project_repository + app_config: BasicMemoryConfig, project_repository, tmp_path ): """An unknown constraint yields an empty watch set rather than watching everything.""" await _register_local_projects( app_config, project_repository, - [{"name": "project-alpha", "path": "/tmp/alpha"}], + [{"name": "project-alpha", "path": str(tmp_path / "alpha")}], ) service = WatchService( diff --git a/tests/sync/test_watch_service_reload.py b/tests/sync/test_watch_service_reload.py index 071465de..7b13ac44 100644 --- a/tests/sync/test_watch_service_reload.py +++ b/tests/sync/test_watch_service_reload.py @@ -236,6 +236,90 @@ async def fake_write_status(): assert seen_project_names == [["local-project", "cloud-bisync"]] +@pytest.mark.asyncio +async def test_run_filters_empty_path_local_mode_project(monkeypatch, tmp_path): + """A project with an empty path is skipped even when mode is LOCAL (issue #949). + + ProjectEntry.mode defaults to LOCAL, so a hand-edited config entry of + ``{"path": ""}`` is not recognized as cloud. The watch cycle must still skip + it: Path("") resolves to the process cwd, and watching that would mutate + whatever directory the server was launched from. + """ + config = BasicMemoryConfig( + watch_project_reload_interval=1, + projects={ + "local-project": {"path": str(tmp_path / "local"), "mode": "local"}, + # No explicit mode -> defaults to LOCAL, with an empty (cwd-relative) path. + "empty-path": {"path": ""}, + }, + ) + repo = _Repo( + projects_return=[ + Project(id=1, name="local-project", path=str(tmp_path / "local"), permalink="local"), + Project(id=2, name="empty-path", path="", permalink="empty-path"), + ] + ) + watch_service = _watch_service(config, repo) + + seen_project_names: list[list[str]] = [] + + async def watch_cycle_stub(projects, stop_event): + seen_project_names.append([p.name for p in projects]) + watch_service.state.running = False + stop_event.set() + + async def fake_write_status(): + return None + + monkeypatch.setattr(watch_service, "_watch_projects_cycle", watch_cycle_stub) + monkeypatch.setattr(watch_service, "write_status", fake_write_status) + + await watch_service.run() + + assert seen_project_names == [["local-project"]] + + +@pytest.mark.asyncio +async def test_run_filters_orphan_db_project_absent_from_config(monkeypatch, tmp_path): + """A DB row not present in config is skipped even with an absolute path. + + Config is the source of truth. Reconciliation normally deletes orphan rows, + but if it is skipped or fails a stale row could remain; watching it would + mutate a directory the user already removed from config. + """ + config = BasicMemoryConfig( + watch_project_reload_interval=1, + projects={ + "local-project": {"path": str(tmp_path / "local"), "mode": "local"}, + }, + ) + repo = _Repo( + projects_return=[ + Project(id=1, name="local-project", path=str(tmp_path / "local"), permalink="local"), + # Absolute path, but no matching entry in config -> stale/orphan row. + Project(id=2, name="orphan", path=str(tmp_path / "orphan"), permalink="orphan"), + ] + ) + watch_service = _watch_service(config, repo) + + seen_project_names: list[list[str]] = [] + + async def watch_cycle_stub(projects, stop_event): + seen_project_names.append([p.name for p in projects]) + watch_service.state.running = False + stop_event.set() + + async def fake_write_status(): + return None + + monkeypatch.setattr(watch_service, "_watch_projects_cycle", watch_cycle_stub) + monkeypatch.setattr(watch_service, "write_status", fake_write_status) + + await watch_service.run() + + assert seen_project_names == [["local-project"]] + + @pytest.mark.asyncio async def test_run_continues_after_cycle_error(monkeypatch, tmp_path): config = BasicMemoryConfig( diff --git a/tests/test_config.py b/tests/test_config.py index f88ad839..14516c78 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1245,6 +1245,30 @@ def test_set_project_mode_local_resets_to_default(self): assert config.projects["research"].mode == ProjectMode.LOCAL assert config.get_project_mode("research") == ProjectMode.LOCAL + def test_is_locally_syncable_true_for_config_project_with_absolute_path(self, tmp_path): + """A project in config with an absolute path is locally syncable.""" + abs_path = str(tmp_path / "research") + config = BasicMemoryConfig(projects={"research": ProjectEntry(path=abs_path)}) + assert config.is_locally_syncable("research", abs_path) is True + + def test_is_locally_syncable_false_for_empty_path(self): + """An empty path resolves to cwd, so it is never locally syncable (#949).""" + config = BasicMemoryConfig(projects={"empty": ProjectEntry(path="")}) + assert config.is_locally_syncable("empty", "") is False + + def test_is_locally_syncable_false_for_relative_path(self): + """A relative (slug) path, as used by cloud-only projects, is not syncable.""" + config = BasicMemoryConfig(projects={"cloud": ProjectEntry(path="cloud-slug")}) + assert config.is_locally_syncable("cloud", "cloud-slug") is False + + def test_is_locally_syncable_false_for_orphan_not_in_config(self, tmp_path): + """A DB row absent from config is not syncable even with an absolute path. + + Config is the source of truth; stale rows must not be synced (#949). + """ + config = BasicMemoryConfig(projects={}) + assert config.is_locally_syncable("orphan", str(tmp_path / "orphan")) is False + def test_cloud_api_key_defaults_to_none(self): """Test that cloud_api_key defaults to None.""" config = BasicMemoryConfig()