diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b9e96e..75dac03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to Notebook LM will be documented in this file. +## [0.3.0.0] - 2026-04-15 + +### Added +- One-click Zotero library import. Hit Cmd+K, type "Zotero", select which collections to import, and your entire paper library becomes searchable notebooks. Each Zotero collection maps to one notebook. +- Auto-detects Zotero data directory on macOS, Windows, and Linux. Opens the database read-only so your Zotero library is never modified. +- Resolves PDF attachment paths from Zotero's storage directory and imports them through the existing ingestion pipeline (chunking, embedding, summarization). + ## [0.2.0.0] - 2026-04-15 ### Added diff --git a/VERSION b/VERSION index e396b40..1da00ae 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.2.0.0 +0.3.0.0 diff --git a/apps/desktop/src/components/layout/AppShell.tsx b/apps/desktop/src/components/layout/AppShell.tsx index e885c2f..50c0315 100644 --- a/apps/desktop/src/components/layout/AppShell.tsx +++ b/apps/desktop/src/components/layout/AppShell.tsx @@ -13,6 +13,7 @@ import { ConnectionBanner } from '../ui/ConnectionBanner'; import { SetupWizard } from '../ui/SetupWizard'; import { CommandPalette } from '../ui/CommandPalette'; import { KeyboardShortcutsOverlay } from '../ui/KeyboardShortcuts'; +import { ZoteroImportDialog } from '../ui/ZoteroImport'; import './layout.css'; function isWizardComplete(): boolean { @@ -32,6 +33,7 @@ export function AppShell() { const [pendingSuggest, setPendingSuggest] = useState(null); const [paletteOpen, setPaletteOpen] = useState(false); const [shortcutsOpen, setShortcutsOpen] = useState(false); + const [zoteroOpen, setZoteroOpen] = useState(false); // Global keyboard shortcuts useEffect(() => { @@ -230,8 +232,9 @@ export function AppShell() { /> - setPaletteOpen(false)} /> + setPaletteOpen(false)} onZoteroImport={() => setZoteroOpen(true)} /> setShortcutsOpen(false)} /> + setZoteroOpen(false)} /> ); } diff --git a/apps/desktop/src/components/ui/CommandPalette.tsx b/apps/desktop/src/components/ui/CommandPalette.tsx index 825963c..aee1912 100644 --- a/apps/desktop/src/components/ui/CommandPalette.tsx +++ b/apps/desktop/src/components/ui/CommandPalette.tsx @@ -18,7 +18,7 @@ function matchScore(query: string, label: string): number { return 0; } -export function CommandPalette({ open, onClose }: { open: boolean; onClose: () => void }) { +export function CommandPalette({ open, onClose, onZoteroImport }: { open: boolean; onClose: () => void; onZoteroImport?: () => void }) { const [query, setQuery] = useState(''); const inputRef = useRef(null); const notebooks = useAppStore((s) => s.notebooks); @@ -87,6 +87,12 @@ export function CommandPalette({ open, onClose }: { open: boolean; onClose: () = onClose(); }, }, + ...(onZoteroImport ? [{ + id: 'action-zotero', + label: 'Import from Zotero', + section: 'Actions' as const, + onSelect: () => { onZoteroImport(); onClose(); }, + }] : []), ]; const filtered = query diff --git a/apps/desktop/src/components/ui/ZoteroImport.tsx b/apps/desktop/src/components/ui/ZoteroImport.tsx new file mode 100644 index 0000000..211de6d --- /dev/null +++ b/apps/desktop/src/components/ui/ZoteroImport.tsx @@ -0,0 +1,199 @@ +import { useEffect, useState } from 'react'; +import { useAppStore } from '../../store/app-store'; +import { useNotebooks } from '../../hooks/useNotebooks'; +import { showToast } from './Toast'; +import './zotero-import.css'; + +interface ZoteroCollection { + id: number; + name: string; + parent_id: number | null; + paper_count: number; +} + +interface ZoteroLibrary { + detected: boolean; + data_dir: string | null; + total_items: number; + total_pdfs: number; + collections: ZoteroCollection[]; + error: string | null; +} + +async function getApiBase(): Promise { + if (window.notebookBridge?.backendUrl) { + try { + const url = await window.notebookBridge.backendUrl(); + if (url) return `${url}/api`; + } catch {} + } + return (import.meta.env.VITE_API_BASE_URL as string | undefined) ?? 'http://127.0.0.1:8000/api'; +} + +export function ZoteroImportDialog({ open, onClose }: { open: boolean; onClose: () => void }) { + const [library, setLibrary] = useState(null); + const [loading, setLoading] = useState(false); + const [importing, setImporting] = useState(false); + const [selected, setSelected] = useState>(new Set()); + const [importResult, setImportResult] = useState(null); + const { refresh: refreshNotebooks } = useNotebooks(); + + useEffect(() => { + if (!open) return; + setLibrary(null); + setSelected(new Set()); + setImportResult(null); + detectZotero(); + }, [open]); + + async function detectZotero() { + setLoading(true); + try { + const base = await getApiBase(); + const res = await fetch(`${base}/zotero/detect`); + const data: ZoteroLibrary = await res.json(); + setLibrary(data); + if (data.detected && data.collections.length > 0) { + setSelected(new Set(data.collections.map((c) => c.id))); + } + } catch (err) { + setLibrary({ detected: false, data_dir: null, total_items: 0, total_pdfs: 0, collections: [], error: 'Failed to connect to backend' }); + } + setLoading(false); + } + + async function handleImport() { + if (selected.size === 0) return; + setImporting(true); + try { + const base = await getApiBase(); + const res = await fetch(`${base}/zotero/import`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + collection_ids: Array.from(selected), + data_dir: library?.data_dir, + }), + }); + const data = await res.json(); + if (!res.ok) { + showToast(data.detail || 'Import failed', 'error'); + setImporting(false); + return; + } + setImportResult( + `Imported ${data.total_pdfs} PDFs into ${data.collections_imported} notebooks (${data.total_chunks} chunks indexed)` + ); + showToast(`Zotero import complete: ${data.total_pdfs} PDFs`, 'success'); + await refreshNotebooks(); + } catch (err) { + showToast('Import failed', 'error'); + } + setImporting(false); + } + + function toggleCollection(id: number) { + setSelected((prev) => { + const next = new Set(prev); + if (next.has(id)) next.delete(id); + else next.add(id); + return next; + }); + } + + function toggleAll() { + if (!library) return; + if (selected.size === library.collections.length) { + setSelected(new Set()); + } else { + setSelected(new Set(library.collections.map((c) => c.id))); + } + } + + if (!open) return null; + + return ( +
+
e.stopPropagation()}> +

Import from Zotero

+ + {loading && ( +
+ + Detecting Zotero library... +
+ )} + + {library && !library.detected && ( +
+

Zotero library not found.

+

{library.error || 'Make sure Zotero is installed and has been opened at least once.'}

+
+ )} + + {library && library.detected && !importResult && ( + <> +

+ {library.total_items} items, {library.total_pdfs} PDFs in {library.collections.length} collections +

+ +
+ + {library.collections.map((c) => ( + + ))} + {library.collections.length === 0 && ( +

No collections found. Create collections in Zotero first.

+ )} +
+ +
+ + +
+ + )} + + {importResult && ( +
+

{importResult}

+ +
+ )} +
+
+ ); +} diff --git a/apps/desktop/src/components/ui/zotero-import.css b/apps/desktop/src/components/ui/zotero-import.css new file mode 100644 index 0000000..b47eece --- /dev/null +++ b/apps/desktop/src/components/ui/zotero-import.css @@ -0,0 +1,177 @@ +.zotero-backdrop { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.5); + display: flex; + align-items: center; + justify-content: center; + z-index: 900; +} + +.zotero-dialog { + width: 500px; + max-width: 90vw; + max-height: 70vh; + padding: var(--space-6); + background: var(--color-bg-secondary); + border: 1px solid var(--color-border); + border-radius: var(--radius-lg); + box-shadow: var(--shadow-lg); + display: flex; + flex-direction: column; + gap: var(--space-4); +} + +.zotero-title { + font-size: var(--text-lg); + font-weight: 600; + color: var(--color-text-primary); + margin: 0; +} + +.zotero-summary { + font-size: var(--text-sm); + color: var(--color-text-secondary); + margin: 0; +} + +.zotero-status { + display: flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-4); + font-size: var(--text-sm); + color: var(--color-text-secondary); +} + +.zotero-status-error { + flex-direction: column; + align-items: flex-start; + color: var(--color-text-muted); +} + +.zotero-hint { + font-size: var(--text-xs); + color: var(--color-text-muted); + margin: 0; +} + +.zotero-spinner { + width: 16px; + height: 16px; + border: 2px solid var(--color-border); + border-top-color: var(--color-accent); + border-radius: 50%; + animation: zoteroSpin 600ms linear infinite; + flex-shrink: 0; +} + +@keyframes zoteroSpin { + to { transform: rotate(360deg); } +} + +.zotero-collections { + overflow-y: auto; + max-height: 300px; + display: flex; + flex-direction: column; + gap: var(--space-1); +} + +.zotero-select-all { + display: flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-2) var(--space-1); + font-size: var(--text-sm); + font-weight: 500; + color: var(--color-text-secondary); + cursor: pointer; + border-bottom: 1px solid var(--color-border); + margin-bottom: var(--space-1); +} + +.zotero-collection-row { + display: flex; + align-items: center; + gap: var(--space-2); + padding: var(--space-2) var(--space-1); + font-size: var(--text-sm); + color: var(--color-text-secondary); + cursor: pointer; + border-radius: var(--radius-sm); + transition: background 80ms ease; +} + +.zotero-collection-row:hover { + background: var(--color-bg-hover); +} + +.zotero-collection-name { + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.zotero-collection-count { + font-size: var(--text-xs); + color: var(--color-text-muted); + flex-shrink: 0; +} + +.zotero-actions { + display: flex; + gap: var(--space-2); + justify-content: flex-end; +} + +.zotero-btn { + font-family: var(--font-sans); + font-size: var(--text-sm); + font-weight: 500; + border: none; + border-radius: var(--radius-pill); + cursor: pointer; + display: inline-flex; + align-items: center; + justify-content: center; + gap: var(--space-2); + transition: opacity 120ms ease; +} + +.zotero-btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.zotero-btn-primary { + padding: 10px 24px; + background: var(--color-accent); + color: #fff; +} + +.zotero-btn-primary:hover:not(:disabled) { + opacity: 0.9; +} + +.zotero-btn-text { + padding: 8px 16px; + background: transparent; + color: var(--color-text-muted); +} + +.zotero-btn-text:hover { + color: var(--color-text-secondary); +} + +.zotero-result { + display: flex; + flex-direction: column; + align-items: center; + gap: var(--space-4); + padding: var(--space-4); + text-align: center; + font-size: var(--text-sm); + color: var(--color-text-secondary); +} diff --git a/backend/notebooklm_backend/app.py b/backend/notebooklm_backend/app.py index cbe4614..8fff1b8 100644 --- a/backend/notebooklm_backend/app.py +++ b/backend/notebooklm_backend/app.py @@ -4,7 +4,7 @@ from starlette.middleware.cors import CORSMiddleware from .config import AppConfig, get_settings -from .routes import health, chat, documents, rag, notebooks, metrics, speech, export, agent, conversations +from .routes import health, chat, documents, rag, notebooks, metrics, speech, export, agent, conversations, zotero from .services.chat import ChatService from .services.embeddings import create_embedding_backend from .services.ingestion import IngestionService @@ -99,6 +99,7 @@ def create_app() -> FastAPI: app.include_router(export.router, prefix="/api") app.include_router(agent.router, prefix="/api") app.include_router(conversations.router, prefix="/api") + app.include_router(zotero.router, prefix="/api") @app.get("/api/config", tags=["config"]) async def read_config() -> dict[str, object]: diff --git a/backend/notebooklm_backend/routes/zotero.py b/backend/notebooklm_backend/routes/zotero.py new file mode 100644 index 0000000..b166500 --- /dev/null +++ b/backend/notebooklm_backend/routes/zotero.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +import logging +from pathlib import Path + +from fastapi import APIRouter, HTTPException, Request +from pydantic import BaseModel + +from ..services.zotero_import import ZoteroScanner + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/zotero", tags=["zotero"]) + + +class ZoteroCollectionResponse(BaseModel): + id: int + name: str + parent_id: int | None + paper_count: int + + +class ZoteroLibraryResponse(BaseModel): + detected: bool + data_dir: str | None = None + total_items: int = 0 + total_pdfs: int = 0 + collections: list[ZoteroCollectionResponse] = [] + error: str | None = None + + +class ZoteroImportRequest(BaseModel): + collection_ids: list[int] | None = None # None = import all + data_dir: str | None = None # Override auto-detection + + +class ZoteroImportProgress(BaseModel): + collection_name: str + notebook_id: str + pdfs_found: int + pdfs_imported: int + chunks_indexed: int + + +class ZoteroImportResponse(BaseModel): + collections_imported: int + total_pdfs: int + total_chunks: int + notebooks_created: list[str] + progress: list[ZoteroImportProgress] + errors: list[str] + + +@router.get("/detect", response_model=ZoteroLibraryResponse) +async def detect_zotero(data_dir: str | None = None) -> ZoteroLibraryResponse: + """Detect Zotero library and list collections.""" + try: + scanner = ZoteroScanner(data_dir=Path(data_dir) if data_dir else None) + info = scanner.get_library_info() + return ZoteroLibraryResponse( + detected=True, + data_dir=str(info.data_dir), + total_items=info.total_items, + total_pdfs=info.total_pdfs, + collections=[ + ZoteroCollectionResponse( + id=c.id, + name=c.name, + parent_id=c.parent_id, + paper_count=c.paper_count, + ) + for c in info.collections + ], + ) + except FileNotFoundError as e: + return ZoteroLibraryResponse(detected=False, error=str(e)) + except Exception as e: + logger.error(f"Zotero detection failed: {e}", exc_info=True) + return ZoteroLibraryResponse(detected=False, error=str(e)) + + +@router.post("/import", response_model=ZoteroImportResponse) +async def import_from_zotero( + request: Request, body: ZoteroImportRequest +) -> ZoteroImportResponse: + """Import PDFs from Zotero collections into Notebook LM notebooks.""" + from ..services.ingestion import IngestionService + from ..services.notebook_store import NotebookStore + + ingestion: IngestionService = request.app.state.ingestion_service + notebook_store: NotebookStore = request.app.state.notebook_store + + try: + scanner = ZoteroScanner(data_dir=Path(body.data_dir) if body.data_dir else None) + except FileNotFoundError as e: + raise HTTPException(status_code=404, detail=str(e)) + + info = scanner.get_library_info() + progress: list[ZoteroImportProgress] = [] + errors: list[str] = [] + notebooks_created: list[str] = [] + total_pdfs = 0 + total_chunks = 0 + + # Determine which collections to import + if body.collection_ids: + collections = [c for c in info.collections if c.id in body.collection_ids] + else: + collections = info.collections + + if not collections: + raise HTTPException(status_code=400, detail="No collections found to import") + + for collection in collections: + pdfs = scanner.get_pdfs_for_collection(collection.id) + valid_pdfs = [p for p in pdfs if p.path and p.path.exists()] + + if not valid_pdfs: + progress.append(ZoteroImportProgress( + collection_name=collection.name, + notebook_id="", + pdfs_found=len(pdfs), + pdfs_imported=0, + chunks_indexed=0, + )) + continue + + # Create a notebook for this collection + import uuid + from datetime import datetime, timezone + from ..models.notebook import NotebookMetadata + + notebook_id = uuid.uuid4().hex + now = datetime.now(timezone.utc) + notebook = NotebookMetadata( + notebook_id=notebook_id, + title=f"Zotero: {collection.name}", + description=f"Imported from Zotero collection '{collection.name}'", + source_count=0, + chunk_count=0, + created_at=now, + updated_at=now, + ) + notebook_store.upsert_notebook(notebook) + notebooks_created.append(notebook_id) + + # Import each PDF + imported = 0 + collection_chunks = 0 + for pdf in valid_pdfs: + try: + result = await ingestion.ingest_path( + notebook_id=notebook_id, + path=pdf.path, + recursive=False, + ) + imported += 1 + collection_chunks += result.chunks_indexed + except Exception as e: + errors.append(f"{collection.name}/{pdf.title}: {str(e)[:100]}") + + # Update notebook counts + notebook.source_count = imported + notebook.chunk_count = collection_chunks + notebook_store.upsert_notebook(notebook) + + total_pdfs += imported + total_chunks += collection_chunks + + progress.append(ZoteroImportProgress( + collection_name=collection.name, + notebook_id=notebook_id, + pdfs_found=len(pdfs), + pdfs_imported=imported, + chunks_indexed=collection_chunks, + )) + + return ZoteroImportResponse( + collections_imported=len([p for p in progress if p.pdfs_imported > 0]), + total_pdfs=total_pdfs, + total_chunks=total_chunks, + notebooks_created=notebooks_created, + progress=progress, + errors=errors, + ) diff --git a/backend/notebooklm_backend/services/zotero_import.py b/backend/notebooklm_backend/services/zotero_import.py new file mode 100644 index 0000000..e827e6e --- /dev/null +++ b/backend/notebooklm_backend/services/zotero_import.py @@ -0,0 +1,222 @@ +"""Read-only scanner for Zotero SQLite database. + +Detects the Zotero data directory, reads collections and their PDF +attachments. Never modifies the Zotero database. +""" +from __future__ import annotations + +import platform +import sqlite3 +from dataclasses import dataclass, field +from pathlib import Path + + +# Default Zotero data directory paths by platform +def _default_zotero_dir() -> Path | None: + system = platform.system() + home = Path.home() + candidates = [] + if system == "Darwin": + candidates = [ + home / "Zotero", + home / "Library" / "Application Support" / "Zotero" / "Profiles", + ] + elif system == "Windows": + candidates = [ + home / "Zotero", + Path("C:/Users") / home.name / "Zotero", + ] + else: # Linux + candidates = [ + home / "Zotero", + home / ".zotero" / "zotero", + ] + for c in candidates: + db = c / "zotero.sqlite" + if db.exists(): + return c + return None + + +@dataclass +class ZoteroCollection: + id: int + name: str + parent_id: int | None = None + paper_count: int = 0 + + +@dataclass +class ZoteroAttachment: + item_id: int + title: str + path: Path | None = None + content_type: str = "" + + +@dataclass +class ZoteroLibraryInfo: + data_dir: Path + db_path: Path + storage_dir: Path + collections: list[ZoteroCollection] = field(default_factory=list) + total_items: int = 0 + total_pdfs: int = 0 + + +class ZoteroScanner: + """Read-only scanner for a Zotero library.""" + + def __init__(self, data_dir: Path | None = None) -> None: + if data_dir is None: + data_dir = _default_zotero_dir() + if data_dir is None: + raise FileNotFoundError( + "Could not find Zotero data directory. " + "Set the path explicitly or ensure Zotero is installed." + ) + self.data_dir = data_dir + self.db_path = data_dir / "zotero.sqlite" + self.storage_dir = data_dir / "storage" + if not self.db_path.exists(): + raise FileNotFoundError(f"Zotero database not found at {self.db_path}") + + def _connect(self) -> sqlite3.Connection: + # Open read-only to never modify the Zotero database + conn = sqlite3.connect(f"file:{self.db_path}?mode=ro", uri=True) + conn.row_factory = sqlite3.Row + return conn + + def get_library_info(self) -> ZoteroLibraryInfo: + conn = self._connect() + try: + total_items = conn.execute( + "SELECT COUNT(*) FROM items WHERE itemTypeID != 14" # 14 = attachment + ).fetchone()[0] + + total_pdfs = conn.execute( + "SELECT COUNT(*) FROM itemAttachments " + "WHERE contentType = 'application/pdf'" + ).fetchone()[0] + + collections = self._list_collections(conn) + + return ZoteroLibraryInfo( + data_dir=self.data_dir, + db_path=self.db_path, + storage_dir=self.storage_dir, + collections=collections, + total_items=total_items, + total_pdfs=total_pdfs, + ) + finally: + conn.close() + + def _list_collections(self, conn: sqlite3.Connection) -> list[ZoteroCollection]: + rows = conn.execute( + "SELECT collectionID, collectionName, parentCollectionID " + "FROM collections ORDER BY collectionName" + ).fetchall() + + collections = [] + for row in rows: + # Count items in this collection + count = conn.execute( + "SELECT COUNT(*) FROM collectionItems WHERE collectionID = ?", + (row["collectionID"],), + ).fetchone()[0] + + collections.append(ZoteroCollection( + id=row["collectionID"], + name=row["collectionName"], + parent_id=row["parentCollectionID"], + paper_count=count, + )) + return collections + + def get_pdfs_for_collection(self, collection_id: int) -> list[ZoteroAttachment]: + """Get all PDF attachments for items in a collection.""" + conn = self._connect() + try: + rows = conn.execute( + """ + SELECT ia.itemID, ia.path, ia.contentType, + COALESCE(idv.value, 'Untitled') as title + FROM collectionItems ci + JOIN itemAttachments ia ON ia.parentItemID = ci.itemID + LEFT JOIN itemData id ON id.itemID = ci.itemID + AND id.fieldID = (SELECT fieldID FROM fields WHERE fieldName = 'title') + LEFT JOIN itemDataValues idv ON idv.valueID = id.valueID + WHERE ci.collectionID = ? + AND ia.contentType = 'application/pdf' + """, + (collection_id,), + ).fetchall() + + attachments = [] + for row in rows: + pdf_path = self._resolve_attachment_path(row["path"], row["itemID"], conn) + attachments.append(ZoteroAttachment( + item_id=row["itemID"], + title=row["title"], + path=pdf_path, + content_type=row["contentType"] or "", + )) + return attachments + finally: + conn.close() + + def get_all_pdfs(self) -> list[ZoteroAttachment]: + """Get all PDF attachments in the entire library.""" + conn = self._connect() + try: + rows = conn.execute( + """ + SELECT ia.itemID, ia.path, ia.contentType, + COALESCE(idv.value, 'Untitled') as title + FROM itemAttachments ia + LEFT JOIN itemData id ON id.itemID = ia.parentItemID + AND id.fieldID = (SELECT fieldID FROM fields WHERE fieldName = 'title') + LEFT JOIN itemDataValues idv ON idv.valueID = id.valueID + WHERE ia.contentType = 'application/pdf' + """, + ).fetchall() + + attachments = [] + for row in rows: + pdf_path = self._resolve_attachment_path(row["path"], row["itemID"], conn) + attachments.append(ZoteroAttachment( + item_id=row["itemID"], + title=row["title"], + path=pdf_path, + content_type=row["contentType"] or "", + )) + return attachments + finally: + conn.close() + + def _resolve_attachment_path( + self, stored_path: str | None, item_id: int, conn: sqlite3.Connection + ) -> Path | None: + """Resolve a Zotero attachment path to an actual file path.""" + if not stored_path: + return None + + # Zotero stores paths as "storage:filename.pdf" + if stored_path.startswith("storage:"): + filename = stored_path[len("storage:"):] + # Get the item key to find the storage subdirectory + row = conn.execute( + "SELECT key FROM items WHERE itemID = ?", (item_id,) + ).fetchone() + if row: + pdf_path = self.storage_dir / row["key"] / filename + if pdf_path.exists(): + return pdf_path + + # Linked file: absolute path + path = Path(stored_path) + if path.is_absolute() and path.exists(): + return path + + return None