From 587cc80eaa3fd00786f41a79433107b8cf2677d1 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Fri, 12 Jun 2026 16:31:38 -0500 Subject: [PATCH 1/3] fix(sync): skip projects without an absolute local path (#949) A project entry in config.json with an empty path (e.g. `{"path": ""}`) caused background sync and the watch service to adopt the process cwd as the project root, injecting Basic Memory frontmatter into unrelated markdown files. The existing guards only skipped a project when get_project_mode() returned CLOUD. But ProjectEntry.mode defaults to LOCAL, so an empty- or relative-path entry without an explicit mode slipped through, and Path("") resolves against the current working directory. Gate local sync and watching on the path itself: any project whose path is not absolute is excluded, regardless of mode. Legitimate local projects are always resolved to absolute paths at creation, and cloud projects with a real local bisync copy keep their absolute path and are still synced/watched. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Drew Cain --- src/basic_memory/services/initialization.py | 28 +++++------ src/basic_memory/sync/watch_service.py | 24 +++++----- tests/services/test_initialization.py | 51 +++++++++++++++++++++ tests/sync/test_watch_service_reload.py | 43 +++++++++++++++++ 4 files changed, 120 insertions(+), 26 deletions(-) diff --git a/src/basic_memory/services/initialization.py b/src/basic_memory/services/initialization.py index c4b3f6dc0..bab149520 100644 --- a/src/basic_memory/services/initialization.py +++ b/src/basic_memory/services/initialization.py @@ -13,7 +13,7 @@ from loguru import logger from basic_memory import db -from basic_memory.config import BasicMemoryConfig, DatabaseBackend, ProjectMode +from basic_memory.config import BasicMemoryConfig, DatabaseBackend from basic_memory.models import Project from basic_memory.repository import ( ProjectRepository, @@ -120,18 +120,20 @@ async def initialize_file_sync( active_projects = [p for p in active_projects if p.name == constrained_project] logger.info(f"Background sync constrained to project: {constrained_project}") - # Skip cloud-mode projects that have no local directory. - # Cloud projects with a local bisync copy (absolute path) are kept for local sync. - cloud_skip = [] - for p in active_projects: - if app_config.get_project_mode(p.name) == ProjectMode.CLOUD: - entry = app_config.projects.get(p.name) - if entry and Path(entry.path).is_absolute(): - continue # Cloud project with local bisync copy — keep for local sync - cloud_skip.append(p.name) - if cloud_skip: - active_projects = [p for p in active_projects if p.name not in cloud_skip] - logger.info(f"Skipping cloud-mode projects for local sync: {cloud_skip}") + # Only projects with an absolute local path are safe to sync. + # Trigger: a project whose path is empty or relative. + # Why: Path("") and other relative paths resolve against the process cwd, so + # syncing such a project would adopt whatever directory the server was + # launched from as the project root and inject frontmatter into unrelated + # markdown files there (issue #949). Empty paths come from cloud-only + # projects, but also from any hand-edited config that left mode at the + # LOCAL default, so the gate is on the path itself, not the project mode. + # Outcome: such projects are excluded from local sync. Cloud projects that + # have a real local bisync copy keep their absolute path and still sync. + skip = [p.name for p in active_projects if not Path(p.path).is_absolute()] + if skip: + active_projects = [p for p in active_projects if p.name not in skip] + logger.info(f"Skipping projects without an absolute local path for sync: {skip}") # Start sync for all projects as background tasks (non-blocking) async def sync_project_background(project: Project): diff --git a/src/basic_memory/sync/watch_service.py b/src/basic_memory/sync/watch_service.py index 599e042a6..faab7c1ea 100644 --- a/src/basic_memory/sync/watch_service.py +++ b/src/basic_memory/sync/watch_service.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from basic_memory.sync.sync_service import SyncService -from basic_memory.config import BasicMemoryConfig, ProjectMode, WATCH_STATUS_JSON +from basic_memory.config import BasicMemoryConfig, WATCH_STATUS_JSON from basic_memory.ignore_utils import load_gitignore_patterns, should_ignore_path from basic_memory.models import Project from basic_memory.repository import ProjectRepository @@ -184,24 +184,22 @@ async def _select_projects_to_watch(self) -> list[Project]: ``--project``, only that project is watched. This keeps concurrent MCP processes from producing duplicate watchers that race on the same files. - 2. Cloud-only projects without a local bisync copy are skipped so we - don't watch a path that does not exist on disk. + 2. Projects without an absolute local path are skipped. A non-absolute + path (empty string for cloud-only projects, or any relative value) + resolves against the process cwd, so watching it would observe and + mutate whatever directory the server was launched from (issue #949). + Cloud projects with a local bisync copy keep their absolute path and + are still watched. """ projects = await self.project_repository.get_active_projects() if self.constrained_project: projects = [p for p in projects if p.name == self.constrained_project] - cloud_skip: list[str] = [] - for p in projects: - if self.app_config.get_project_mode(p.name) == ProjectMode.CLOUD: - entry = self.app_config.projects.get(p.name) - if entry and Path(entry.path).is_absolute(): - continue # Cloud project with local bisync copy — keep watching - cloud_skip.append(p.name) - if cloud_skip: - projects = [p for p in projects if p.name not in cloud_skip] - logger.debug(f"Skipping cloud-mode projects in watch cycle: {cloud_skip}") + skip = [p.name for p in projects if not Path(p.path).is_absolute()] + if skip: + projects = [p for p in projects if p.name not in skip] + logger.debug(f"Skipping projects without an absolute local path in watch cycle: {skip}") return list(projects) diff --git a/tests/services/test_initialization.py b/tests/services/test_initialization.py index 793431294..0a7d646c1 100644 --- a/tests/services/test_initialization.py +++ b/tests/services/test_initialization.py @@ -222,6 +222,57 @@ async def test_initialize_file_sync_no_constraint_when_env_unset( assert _FakeWatchService.last_kwargs.get("constrained_project") is None +@pytest.mark.asyncio +async def test_initialize_file_sync_skips_project_with_non_absolute_path( + app_config: BasicMemoryConfig, config_manager, config_home, monkeypatch +): + """Projects without an absolute local path are excluded from background sync (issue #949). + + A config entry of ``{"path": ""}`` defaults to LOCAL mode and is not + recognized as cloud, yet Path("") resolves to the process cwd. Syncing it + would inject frontmatter into unrelated files, so it must be skipped. + """ + await db.shutdown_db() + try: + from basic_memory.config import ProjectEntry + + good = config_home / "good" + good.mkdir(parents=True, exist_ok=True) + + updated = app_config.model_copy( + update={ + "projects": { + "good": ProjectEntry(path=str(good)), + # No mode -> defaults to LOCAL, empty (cwd-relative) path. + "empty-path": ProjectEntry(path=""), + }, + "default_project": "good", + } + ) + config_manager.save_config(updated) + + await initialize_database(updated) + await reconcile_projects_with_config(updated) + + _disable_test_env_short_circuit(monkeypatch) + monkeypatch.setattr("basic_memory.sync.WatchService", _FakeWatchService) + + infos: list[str] = [] + monkeypatch.setattr( + "basic_memory.services.initialization.logger.info", + lambda message, *args, **kwargs: infos.append(message), + ) + + await initialize_file_sync(updated, quiet=True) + + skip_logs = [m for m in infos if "without an absolute local path" in m] + assert skip_logs, "expected a skip log for the empty-path project" + assert "empty-path" in skip_logs[0] + assert "good" not in skip_logs[0] + finally: + await db.shutdown_db() + + @pytest.mark.asyncio async def test_initialize_app_no_precedence_warning_when_not_conflicting( app_config: BasicMemoryConfig, monkeypatch diff --git a/tests/sync/test_watch_service_reload.py b/tests/sync/test_watch_service_reload.py index 071465de3..c0b0377be 100644 --- a/tests/sync/test_watch_service_reload.py +++ b/tests/sync/test_watch_service_reload.py @@ -236,6 +236,49 @@ async def fake_write_status(): assert seen_project_names == [["local-project", "cloud-bisync"]] +@pytest.mark.asyncio +async def test_run_filters_empty_path_local_mode_project(monkeypatch, tmp_path): + """A project with an empty path is skipped even when mode is LOCAL (issue #949). + + ProjectEntry.mode defaults to LOCAL, so a hand-edited config entry of + ``{"path": ""}`` is not recognized as cloud. The watch cycle must still skip + it: Path("") resolves to the process cwd, and watching that would mutate + whatever directory the server was launched from. + """ + config = BasicMemoryConfig( + watch_project_reload_interval=1, + projects={ + "local-project": {"path": str(tmp_path / "local"), "mode": "local"}, + # No explicit mode -> defaults to LOCAL, with an empty (cwd-relative) path. + "empty-path": {"path": ""}, + }, + ) + repo = _Repo( + projects_return=[ + Project(id=1, name="local-project", path=str(tmp_path / "local"), permalink="local"), + Project(id=2, name="empty-path", path="", permalink="empty-path"), + ] + ) + watch_service = _watch_service(config, repo) + + seen_project_names: list[list[str]] = [] + + async def watch_cycle_stub(projects, stop_event): + seen_project_names.append([p.name for p in projects]) + watch_service.state.running = False + stop_event.set() + + async def fake_write_status(): + return None + + monkeypatch.setattr(watch_service, "_watch_projects_cycle", watch_cycle_stub) + monkeypatch.setattr(watch_service, "write_status", fake_write_status) + + await watch_service.run() + + assert seen_project_names == [["local-project"]] + + @pytest.mark.asyncio async def test_run_continues_after_cycle_error(monkeypatch, tmp_path): config = BasicMemoryConfig( From 0414ce9ff054b1b7a9782f30d3bcc27b53cf6634 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Fri, 12 Jun 2026 17:04:36 -0500 Subject: [PATCH 2/3] test(sync): use OS-absolute paths in watch selection tests The absolute-path sync guard from the previous commit now checks every project's path, not just cloud-mode ones. Three existing watch-selection tests hardcoded POSIX paths like "/tmp/alpha", which are absolute on Linux/macOS but not on Windows (no drive letter), so the guard filtered them out and the tests failed on windows-latest. Build the project paths from the tmp_path fixture so they are absolute on every platform. Production paths are unaffected: real local projects are always resolved to OS-absolute paths at creation. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Drew Cain --- tests/sync/test_watch_service.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/sync/test_watch_service.py b/tests/sync/test_watch_service.py index 2a8ac2002..e5c4d2117 100644 --- a/tests/sync/test_watch_service.py +++ b/tests/sync/test_watch_service.py @@ -71,15 +71,18 @@ async def _register_local_projects( @pytest.mark.asyncio async def test_select_projects_to_watch_returns_all_when_unconstrained( - app_config: BasicMemoryConfig, project_repository + app_config: BasicMemoryConfig, project_repository, tmp_path ): """Without a --project constraint, every active project is watched.""" + # Use tmp_path so the project paths are OS-absolute on Windows too — a + # POSIX-style "/tmp/alpha" is not absolute on Windows (no drive letter), + # and _select_projects_to_watch now skips non-absolute paths (issue #949). await _register_local_projects( app_config, project_repository, [ - {"name": "project-alpha", "path": "/tmp/alpha"}, - {"name": "project-beta", "path": "/tmp/beta"}, + {"name": "project-alpha", "path": str(tmp_path / "alpha")}, + {"name": "project-beta", "path": str(tmp_path / "beta")}, ], ) @@ -94,7 +97,7 @@ async def test_select_projects_to_watch_returns_all_when_unconstrained( @pytest.mark.asyncio async def test_select_projects_to_watch_filters_to_constrained_project( - app_config: BasicMemoryConfig, project_repository + app_config: BasicMemoryConfig, project_repository, tmp_path ): """With ``constrained_project`` set, only that project is returned. @@ -106,8 +109,8 @@ async def test_select_projects_to_watch_filters_to_constrained_project( app_config, project_repository, [ - {"name": "project-alpha", "path": "/tmp/alpha"}, - {"name": "project-beta", "path": "/tmp/beta"}, + {"name": "project-alpha", "path": str(tmp_path / "alpha")}, + {"name": "project-beta", "path": str(tmp_path / "beta")}, ], ) @@ -124,13 +127,13 @@ async def test_select_projects_to_watch_filters_to_constrained_project( @pytest.mark.asyncio async def test_select_projects_to_watch_empty_when_constrained_project_missing( - app_config: BasicMemoryConfig, project_repository + app_config: BasicMemoryConfig, project_repository, tmp_path ): """An unknown constraint yields an empty watch set rather than watching everything.""" await _register_local_projects( app_config, project_repository, - [{"name": "project-alpha", "path": "/tmp/alpha"}], + [{"name": "project-alpha", "path": str(tmp_path / "alpha")}], ) service = WatchService( From 51a529becf4b257f195e6f9f76c3ff7204962a64 Mon Sep 17 00:00:00 2001 From: Drew Cain Date: Fri, 12 Jun 2026 17:41:12 -0500 Subject: [PATCH 3/3] fix(sync): also exclude orphan DB projects absent from config (#949) Address Codex review feedback. The previous path-only filter dropped an implicit protection: get_project_mode() defaults projects missing from config to CLOUD, so the old mode-based guard skipped stale DB rows that had been removed from config. With a path-only check, an orphan row with an absolute path would pass and background sync/watch could still mutate a directory the user already removed from config (config is the source of truth) if reconciliation was skipped or failed. Introduce BasicMemoryConfig.is_locally_syncable(name, path), which requires both config membership and an absolute path, and use it from both the background sync selection and the watch cycle so the two paths cannot diverge. Add direct unit tests for the helper plus an orphan-row regression test for the watch selection. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Drew Cain --- src/basic_memory/config.py | 20 ++++++++++ src/basic_memory/services/initialization.py | 18 +++------ src/basic_memory/sync/watch_service.py | 18 +++++---- tests/services/test_initialization.py | 2 +- tests/sync/test_watch_service_reload.py | 41 +++++++++++++++++++++ tests/test_config.py | 24 ++++++++++++ 6 files changed, 102 insertions(+), 21 deletions(-) diff --git a/src/basic_memory/config.py b/src/basic_memory/config.py index 92ff504ab..3da68d183 100644 --- a/src/basic_memory/config.py +++ b/src/basic_memory/config.py @@ -658,6 +658,26 @@ def get_project_mode(self, project_name: str) -> ProjectMode: entry = self.projects.get(project_name) return entry.mode if entry else ProjectMode.CLOUD + def is_locally_syncable(self, project_name: str, project_path: str) -> bool: + """Whether a project should be synced/watched on the local filesystem. + + Both conditions are required (issue #949): + + * The project is present in config. Config is the source of truth, so a + stale database row that was removed from config — but whose deletion + has not yet been reconciled, or whose reconciliation failed — must + not be synced even though it still has a real directory on disk. + * Its path is absolute. An empty or relative path resolves against the + process cwd, so syncing it would adopt whatever directory the server + was launched from as the project root and mutate unrelated files. + + Cloud-only projects (empty/slug path) and cloud projects with a real + local bisync copy (absolute path) are handled correctly by these two + conditions, so no separate mode check is needed. + """ + entry = self.projects.get(project_name) + return entry is not None and Path(project_path).is_absolute() + def set_project_mode(self, project_name: str, mode: ProjectMode) -> None: """Set the routing mode for a project. diff --git a/src/basic_memory/services/initialization.py b/src/basic_memory/services/initialization.py index bab149520..784b54deb 100644 --- a/src/basic_memory/services/initialization.py +++ b/src/basic_memory/services/initialization.py @@ -120,20 +120,14 @@ async def initialize_file_sync( active_projects = [p for p in active_projects if p.name == constrained_project] logger.info(f"Background sync constrained to project: {constrained_project}") - # Only projects with an absolute local path are safe to sync. - # Trigger: a project whose path is empty or relative. - # Why: Path("") and other relative paths resolve against the process cwd, so - # syncing such a project would adopt whatever directory the server was - # launched from as the project root and inject frontmatter into unrelated - # markdown files there (issue #949). Empty paths come from cloud-only - # projects, but also from any hand-edited config that left mode at the - # LOCAL default, so the gate is on the path itself, not the project mode. - # Outcome: such projects are excluded from local sync. Cloud projects that - # have a real local bisync copy keep their absolute path and still sync. - skip = [p.name for p in active_projects if not Path(p.path).is_absolute()] + # Only sync projects that are in config (source of truth) and have an + # absolute local path; see BasicMemoryConfig.is_locally_syncable. This keeps + # background sync from adopting the process cwd as a project root and + # mutating unrelated files (issue #949). + skip = [p.name for p in active_projects if not app_config.is_locally_syncable(p.name, p.path)] if skip: active_projects = [p for p in active_projects if p.name not in skip] - logger.info(f"Skipping projects without an absolute local path for sync: {skip}") + logger.info(f"Skipping projects that are not locally syncable for sync: {skip}") # Start sync for all projects as background tasks (non-blocking) async def sync_project_background(project: Project): diff --git a/src/basic_memory/sync/watch_service.py b/src/basic_memory/sync/watch_service.py index faab7c1ea..ccf37d913 100644 --- a/src/basic_memory/sync/watch_service.py +++ b/src/basic_memory/sync/watch_service.py @@ -184,22 +184,24 @@ async def _select_projects_to_watch(self) -> list[Project]: ``--project``, only that project is watched. This keeps concurrent MCP processes from producing duplicate watchers that race on the same files. - 2. Projects without an absolute local path are skipped. A non-absolute - path (empty string for cloud-only projects, or any relative value) - resolves against the process cwd, so watching it would observe and - mutate whatever directory the server was launched from (issue #949). - Cloud projects with a local bisync copy keep their absolute path and - are still watched. + 2. Projects that are not locally syncable are skipped — those missing + from config (config is the source of truth, so stale DB rows must + not be watched) or with a non-absolute path (which would resolve + against the process cwd and make the watcher observe and mutate the + directory the server was launched from). See + ``BasicMemoryConfig.is_locally_syncable`` (issue #949). Cloud + projects with a local bisync copy keep their absolute path and are + still watched. """ projects = await self.project_repository.get_active_projects() if self.constrained_project: projects = [p for p in projects if p.name == self.constrained_project] - skip = [p.name for p in projects if not Path(p.path).is_absolute()] + skip = [p.name for p in projects if not self.app_config.is_locally_syncable(p.name, p.path)] if skip: projects = [p for p in projects if p.name not in skip] - logger.debug(f"Skipping projects without an absolute local path in watch cycle: {skip}") + logger.debug(f"Skipping projects that are not locally syncable in watch cycle: {skip}") return list(projects) diff --git a/tests/services/test_initialization.py b/tests/services/test_initialization.py index 0a7d646c1..8723cba6d 100644 --- a/tests/services/test_initialization.py +++ b/tests/services/test_initialization.py @@ -265,7 +265,7 @@ async def test_initialize_file_sync_skips_project_with_non_absolute_path( await initialize_file_sync(updated, quiet=True) - skip_logs = [m for m in infos if "without an absolute local path" in m] + skip_logs = [m for m in infos if "not locally syncable" in m] assert skip_logs, "expected a skip log for the empty-path project" assert "empty-path" in skip_logs[0] assert "good" not in skip_logs[0] diff --git a/tests/sync/test_watch_service_reload.py b/tests/sync/test_watch_service_reload.py index c0b0377be..7b13ac44f 100644 --- a/tests/sync/test_watch_service_reload.py +++ b/tests/sync/test_watch_service_reload.py @@ -279,6 +279,47 @@ async def fake_write_status(): assert seen_project_names == [["local-project"]] +@pytest.mark.asyncio +async def test_run_filters_orphan_db_project_absent_from_config(monkeypatch, tmp_path): + """A DB row not present in config is skipped even with an absolute path. + + Config is the source of truth. Reconciliation normally deletes orphan rows, + but if it is skipped or fails a stale row could remain; watching it would + mutate a directory the user already removed from config. + """ + config = BasicMemoryConfig( + watch_project_reload_interval=1, + projects={ + "local-project": {"path": str(tmp_path / "local"), "mode": "local"}, + }, + ) + repo = _Repo( + projects_return=[ + Project(id=1, name="local-project", path=str(tmp_path / "local"), permalink="local"), + # Absolute path, but no matching entry in config -> stale/orphan row. + Project(id=2, name="orphan", path=str(tmp_path / "orphan"), permalink="orphan"), + ] + ) + watch_service = _watch_service(config, repo) + + seen_project_names: list[list[str]] = [] + + async def watch_cycle_stub(projects, stop_event): + seen_project_names.append([p.name for p in projects]) + watch_service.state.running = False + stop_event.set() + + async def fake_write_status(): + return None + + monkeypatch.setattr(watch_service, "_watch_projects_cycle", watch_cycle_stub) + monkeypatch.setattr(watch_service, "write_status", fake_write_status) + + await watch_service.run() + + assert seen_project_names == [["local-project"]] + + @pytest.mark.asyncio async def test_run_continues_after_cycle_error(monkeypatch, tmp_path): config = BasicMemoryConfig( diff --git a/tests/test_config.py b/tests/test_config.py index f88ad8399..14516c781 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1245,6 +1245,30 @@ def test_set_project_mode_local_resets_to_default(self): assert config.projects["research"].mode == ProjectMode.LOCAL assert config.get_project_mode("research") == ProjectMode.LOCAL + def test_is_locally_syncable_true_for_config_project_with_absolute_path(self, tmp_path): + """A project in config with an absolute path is locally syncable.""" + abs_path = str(tmp_path / "research") + config = BasicMemoryConfig(projects={"research": ProjectEntry(path=abs_path)}) + assert config.is_locally_syncable("research", abs_path) is True + + def test_is_locally_syncable_false_for_empty_path(self): + """An empty path resolves to cwd, so it is never locally syncable (#949).""" + config = BasicMemoryConfig(projects={"empty": ProjectEntry(path="")}) + assert config.is_locally_syncable("empty", "") is False + + def test_is_locally_syncable_false_for_relative_path(self): + """A relative (slug) path, as used by cloud-only projects, is not syncable.""" + config = BasicMemoryConfig(projects={"cloud": ProjectEntry(path="cloud-slug")}) + assert config.is_locally_syncable("cloud", "cloud-slug") is False + + def test_is_locally_syncable_false_for_orphan_not_in_config(self, tmp_path): + """A DB row absent from config is not syncable even with an absolute path. + + Config is the source of truth; stale rows must not be synced (#949). + """ + config = BasicMemoryConfig(projects={}) + assert config.is_locally_syncable("orphan", str(tmp_path / "orphan")) is False + def test_cloud_api_key_defaults_to_none(self): """Test that cloud_api_key defaults to None.""" config = BasicMemoryConfig()