diff --git a/.env.example b/.env.example
index f7b6860..5e15a8c 100644
--- a/.env.example
+++ b/.env.example
@@ -25,6 +25,10 @@ SLACK_BOT_TOKEN=xoxb-xxxxxxxxxxxx-xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxx
SLACK_APP_TOKEN=xapp-x-xxxxxxxxxxxx-xxxxxxxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
SLACK_WORKSPACE=your_workspace
+# Todoist API token (for tasks and overdue items in briefings)
+# Create token at: https://app.todoist.com/app/settings/integrations/developer
+TODOIST_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
# Authorized Slack User IDs (comma-separated)
# Find your user ID: Click profile -> More -> Copy member ID
SLACK_AUTHORIZED_USERS=U01234567,U89012345
@@ -41,6 +45,8 @@ BOT_MODE=agent
# Enable streaming responses (agent and multi_agent modes)
ENABLE_STREAMING=true
+# Slack message edits redraw the full message; 1.0-1.5s usually feels smoother than token-level updates.
+STREAMING_UPDATE_INTERVAL=1.25
# Direct email send behavior
# false (default): tool can only create drafts
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..b003744
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,28 @@
+# Agent Instructions
+
+This repo is a personal/local assistant system. Treat local credentials, indexed data, and logs as sensitive. Do not expose the bot publicly or broaden access controls without an explicit security review.
+
+## Orientation
+
+- Main user-facing docs are in `README.md`.
+- Claude-specific project notes are in `CLAUDE.md`; keep this file aligned with those notes when changing agent behavior.
+- Runtime configuration lives in `src/config.py` and `.env.example`.
+- The Slack bot supports `intent`, `agent`, and `multi_agent` modes. `multi_agent` routes through specialist agents in `src/bot/agents/`.
+- Calendar "next/upcoming" behavior is current-time-aware through `USER_TIMEZONE`; preserve that invariant when changing calendar tools or prompts.
+
+## Development
+
+- Use `rg` for repo search.
+- Prefer narrow, behavior-focused changes over broad refactors.
+- Confirmable write actions live under `src/bot/actions/` and should be routed through the pending-action confirmation flow.
+- Do not bypass Slack confirmation for writes such as email drafts/sends, calendar events, GitHub issues, Todoist changes, Notion writes, or Zotero additions.
+- Run focused tests for touched areas, and run full `pytest` when changing shared bot, agent, tool, or integration code.
+
+## Common Commands
+
+```bash
+pytest
+pytest tests/test_executor.py
+ruff check src tests
+python scripts/run_bot.py
+```
diff --git a/CLAUDE.md b/CLAUDE.md
index e0087a5..faa9d3e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,6 +1,6 @@
# Engram - Claude Code Instructions
-Personal knowledge graph system aggregating data from 6 Google accounts, GitHub, and Slack with semantic search and Slack bot interface.
+Personal knowledge graph system aggregating data from Google accounts, GitHub, Slack, Notion, Todoist, and Zotero with semantic search and a Slack bot interface.
## Project Structure
@@ -14,7 +14,10 @@ engram/
│ │ ├── google_multi.py # Multi-account manager with tiered search
│ │ ├── gmail.py, gdrive.py, gcalendar.py
│ │ ├── github_client.py
-│ │ └── slack.py
+│ │ ├── slack.py
+│ │ ├── notion_client.py
+│ │ ├── todoist_client.py
+│ │ └── zotero_client.py
│ ├── indexers/ # Content indexers
│ │ ├── gmail_indexer.py, gdrive_indexer.py, gcal_indexer.py
│ │ ├── github_indexer.py, slack_indexer.py
@@ -27,7 +30,8 @@ engram/
│ │ ├── app.py # Main bot (Socket Mode)
│ │ ├── intent_router.py # LLM intent classification
│ │ ├── handlers/ # Intent handlers
-│ │ └── actions/ # Confirmable actions
+│ │ ├── agents/ # Multi-agent specialists
+│ │ └── actions/ # Confirmable write actions
│ └── query/ # Query engine
├── scripts/ # Orchestration scripts
├── tests/ # Pytest tests
@@ -51,6 +55,9 @@ All secrets in `.env`:
- `OPENAI_API_KEY` - For embeddings
- `ANTHROPIC_API_KEY` - For intent classification
- `SLACK_AUTHORIZED_USERS` - Comma-separated user IDs
+- `USER_TIMEZONE` - IANA timezone used for calendar and relative date handling
+- `BOT_MODE` - `intent`, `agent`, or `multi_agent`
+- `ENABLE_STREAMING` and `STREAMING_UPDATE_INTERVAL` - Slack streaming behavior
## Common Commands
@@ -97,7 +104,9 @@ SQLite database with tables:
- Socket Mode (no public URL needed)
- Claude Haiku for intent classification
- Multi-turn conversations with 30-min TTL
-- Actions that modify data require confirmation
+- Agent and multi-agent modes use Claude tool calling
+- Calendar "next/upcoming" answers use current local time and exclude already-ended events
+- Calendar create/update/cancel, Google Doc comments/replies/resolution, Todoist creates/updates/comments/completions/reopens, notification-setting changes, and other writes require Slack button confirmation
### Security
- OAuth tokens stored locally in `credentials/`
diff --git a/README.md b/README.md
index 660f64c..7d16720 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ This project is designed first for personal/local use. Do not expose it publicly
### Core Capabilities
- **Multi-Account Google Integration**: Sync Gmail, Google Drive, and Google Calendar from up to 6 accounts with tiered search (primary accounts searched first)
-- **Google Write Capabilities**: Create email drafts, create/modify calendar events, and comment on Google Docs
+- **Google Write Capabilities**: Create email drafts, create/modify/cancel calendar events, and comment/reply/resolve comments on Google Docs, with confirmation before writes
- **Zotero Integration**: Search papers, add references by DOI/URL with automatic metadata extraction (CrossRef + page scraping)
- **Notion & Todoist**: Search pages, manage tasks, create content
- **Knowledge Graph**: SQLite-based storage of entities (people, repos, files) and content with relationship tracking
@@ -23,11 +23,12 @@ This project is designed first for personal/local use. Do not expose it publicly
### Advanced Agent Features
- **Natural Conversation**: Chat naturally without triggering tool searches - greetings, questions about the bot, and general conversation are handled intelligently
- **Multi-Agent Architecture**: Orchestrator routes tasks to specialist agents (Calendar, Email, GitHub, Research) for domain expertise
-- **Streaming Responses**: Real-time token-by-token response streaming for better UX
+- **Streaming Responses**: Slack-friendly streaming with readable partial updates instead of choppy token-level edits
- **Tool Calling**: LLM-driven tool selection with multi-step execution capabilities
- **Persistent Memory**: Conversation history and user preferences survive restarts
- **Proactive Alerts**: Calendar reminders, important email notifications, and daily briefings
-- **Confirmation-Gated Actions**: Sensitive actions use explicit Slack confirmation buttons with action-ID validation
+- **Slack-Configurable Notifications**: Inspect and update proactive reminder/briefing/quiet-hours settings from Slack
+- **Confirmation-Gated Actions**: Write actions use explicit Slack confirmation buttons with action-ID validation
### Security
- **Prompt Injection Protection**: Pattern-based detection and sanitization of malicious inputs
@@ -162,6 +163,12 @@ SLACK_BOT_TOKEN=xoxb-xxxxx
SLACK_APP_TOKEN=xapp-xxxxx
SLACK_WORKSPACE=your_workspace
+# Notion API
+# Create integration at: https://www.notion.so/my-integrations
+# Share each target page/database with the integration
+NOTION_API_KEY=secret_xxxxx
+NOTION_WORKSPACE=default
+
# Authorized Slack User IDs (comma-separated)
# Find your ID: Click profile → More → Copy member ID
SLACK_AUTHORIZED_USERS=U12345678
@@ -197,6 +204,10 @@ AGENT_MODEL=claude-sonnet-4-20250514
# Enable streaming responses (applies to agent and multi_agent modes)
ENABLE_STREAMING=true
+# Slack message edit interval for streaming responses.
+# Slack redraws edited messages; 1.0-1.5s usually feels smoother than token-level updates.
+STREAMING_UPDATE_INTERVAL=1.25
+
# Direct email send behavior
# false (default): draft-only (recommended)
# true: SendEmailTool enabled, but still requires explicit Slack confirmation button
@@ -215,6 +226,17 @@ AUDIT_RETENTION_DAYS=90
AUDIT_LOG_MESSAGES=false # Store raw message text in audit logs
```
+### Notion Setup
+
+1. Go to [Notion integrations](https://www.notion.so/my-integrations) and create an **Internal Integration**.
+2. Copy the **Internal Integration Token** and set `NOTION_API_KEY` in `.env`.
+3. In Notion, open each page/database you want indexed, click **Share**, and invite your integration.
+4. Verify access:
+
+```bash
+python -c "from src.integrations.notion_client import NotionClient; print(NotionClient().test_connection())"
+```
+
### Google Account Authentication
Run the OAuth setup for each Google account:
@@ -298,12 +320,13 @@ Legacy mode using intent classification with hardcoded handlers.
Single agent with LLM-driven tool calling.
- Dynamic tool selection
- Multi-step execution
-- Streaming responses
+- Slack-friendly streaming responses
- Natural conversation support
### Multi-Agent Mode (`multi_agent`)
Orchestrator routes to specialist agents.
-- **Calendar Agent**: View events, check availability, create events with attendee invites
+- **Calendar Agent**: View events, answer next/upcoming questions using current local time, check availability, create events with attendee invites
+- **Calendar Agent**: View events, answer next/upcoming questions using current local time, check availability, create/update/cancel events with attendee notifications
- **Email Agent**: Search, drafts, and optional send (feature-flagged)
- **GitHub Agent**: PRs, issues, repository activity
- **Research Agent**: Semantic search, briefings
@@ -319,12 +342,16 @@ Talk to the bot via DM or @mention in channels:
| `Hi` / `Hello` | Natural greeting - no tool search triggered |
| `What can you do?` | Help and capabilities overview |
| `What's on my calendar today?` | Show today's events from all accounts |
+| `What's my next event?` | Show the next event using your configured local timezone |
| `What's my schedule for tomorrow?` | Show tomorrow's calendar |
| `When am I free this week?` | Find available time slots |
| `Search for emails about [topic]` | Semantic search across emails |
| `Send an email to [person] about [topic]` | Create draft by default, or send via explicit confirmation if enabled |
| `Create a meeting with [person] tomorrow at 2pm` | Create calendar events |
+| `Move event [id] to tomorrow at 3pm` | Update calendar events after confirmation |
+| `Cancel event [id]` | Cancel calendar events after confirmation |
| `Find documents about [topic]` | Search Google Drive files |
+| `Comment on Google Doc [id]: [comment]` | Add Google Doc comments after confirmation |
| `Show my open PRs` | List your GitHub pull requests |
| `What issues are assigned to me?` | List assigned GitHub issues |
| `Search my papers for [topic]` | Search Zotero library |
@@ -333,6 +360,7 @@ Talk to the bot via DM or @mention in channels:
| `Find papers by [author]` | Search papers by author |
| `What did I miss yesterday?` | Daily briefing for a specific date |
| `Help` | Show available commands |
+| `Set my briefing to 8am weekdays` | Update proactive notification settings |
### Example Interactions
@@ -370,9 +398,10 @@ Bot: 🟢 Available slots tomorrow:
• 4:00 PM - 6:00 PM
You: Create a meeting with alice@company.com tomorrow at 2pm for 30 minutes
-Bot: Created event "Meeting" on 2024-02-04 at 2:00 PM.
- Calendar invite sent to alice@company.com.
- https://calendar.google.com/event?eid=xxx
+Bot: Please confirm creating this calendar event:
+ Event: Meeting
+ When: 2024-02-04 2:00 PM (30 min)
+ Attendees: alice@company.com
```
## Project Structure
@@ -417,6 +446,7 @@ engram/
│ │ ├── event_handlers.py # Message handlers with security
│ │ ├── intent_router.py # LLM intent classification
│ │ ├── conversation.py # Conversation state + persistence
+│ │ ├── datetime_utils.py # Shared date/time parsing helpers
│ │ ├── formatters.py # Slack Block Kit formatting
│ │ ├── tools.py # Tool definitions for LLM
│ │ ├── executor.py # Agent executor with streaming
@@ -424,6 +454,7 @@ engram/
│ │ ├── heartbeat.py # Proactive notifications
│ │ ├── security.py # Input sanitization + rate limiting
│ │ ├── audit.py # Comprehensive audit logging
+│ │ ├── actions/ # Confirmable write actions
│ │ ├── handlers/ # Intent-specific handlers
│ │ │ ├── calendar.py
│ │ │ ├── email.py
@@ -462,7 +493,7 @@ engram/
│ └── audit.db # Security audit log
├── logs/ # Log files (gitignored)
├── credentials/ # OAuth tokens (gitignored)
-└── tests/ # Test suite (360 tests)
+└── tests/ # Test suite (361 passing, 6 skipped)
```
## Automation (macOS)
@@ -488,9 +519,9 @@ launchctl load ~/Library/LaunchAgents/com.engram.bot.plist
- **Data**: All indexed data stays local in `data/` (gitignored)
- **Bot Access**: Only Slack users listed in `SLACK_AUTHORIZED_USERS` can interact with the bot
- **Email Sending**: Draft-only by default. Set `ENABLE_DIRECT_EMAIL_SEND=true` to enable send, which still requires explicit confirmation.
-- **Calendar Events**: The bot can create/modify events but confirms before making changes
-- **Doc Comments**: The bot can add comments to Google Docs you have access to
-- **GitHub Actions**: Issue creation requires explicit confirmation
+- **Calendar Events**: The bot answers "next/upcoming" queries using `USER_TIMEZONE` and excludes events that already ended today. Creating, updating, and cancelling events require confirmation.
+- **Doc Comments**: The bot can add, reply to, and resolve comments on Google Docs you have access to
+- **Other Write Actions**: Email drafts, GitHub issues, Todoist task changes, Notion writes, and Zotero additions require explicit confirmation
- **Action Integrity**: Confirmation clicks are validated by action ID and thread-aware context lookup to prevent stale/mismatched execution
- **Confirmation Timeout**: Pending confirmations expire after 5 minutes and must be re-requested
diff --git a/pyproject.toml b/pyproject.toml
index 733bb6e..0843ae1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,10 @@ dependencies = [
"anthropic>=0.18.0",
"chromadb>=0.4.0",
"mem0ai>=0.1.0",
+ "numpy>=1.24.0",
+ "notion-client>=2.0.0",
"httpx>=0.25.0",
+ "requests>=2.31.0",
"tenacity>=8.2.0",
"tqdm>=4.66.0",
"python-dateutil>=2.8.0",
diff --git a/requirements.txt b/requirements.txt
index b6d130f..10468d2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,12 +21,17 @@ openai>=1.0.0
anthropic>=0.18.0
chromadb>=0.4.0
mem0ai>=0.1.0
+numpy>=1.24.0
+
+# Notion
+notion-client>=2.0.0
# Database
# SQLite is built-in
# Utilities
httpx>=0.25.0
+requests>=2.31.0
tenacity>=8.2.0
tqdm>=4.66.0
python-dateutil>=2.8.0
diff --git a/scripts/ideaspark_cron.sh b/scripts/ideaspark_cron.sh
new file mode 100755
index 0000000..d0b8e64
--- /dev/null
+++ b/scripts/ideaspark_cron.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# IdeaSpark cron wrapper — loads env and runs daily pipeline.
+#
+# Usage (crontab -e):
+# 0 5 * * * /Users/hani/Box\ Sync/CLAUDE/engram/scripts/ideaspark_cron.sh generate
+# 0 22 * * * /Users/hani/Box\ Sync/CLAUDE/engram/scripts/ideaspark_cron.sh feedback
+#
+# Or use launchd (recommended on macOS) — see scripts/launchd/ directory.
+
+set -euo pipefail
+
+ENGRAM_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+LOGFILE="${ENGRAM_DIR}/logs/ideaspark_cron.log"
+mkdir -p "${ENGRAM_DIR}/logs"
+
+# Source .env for API keys
+if [ -f "${ENGRAM_DIR}/.env" ]; then
+ set -a
+ source "${ENGRAM_DIR}/.env"
+ set +a
+fi
+
+# Use the correct Python — miniforge3 has project dependencies
+if [ -f "${HOME}/miniforge3/bin/python" ]; then
+ PYTHON="${HOME}/miniforge3/bin/python"
+elif [ -f "${HOME}/miniconda3/bin/python" ]; then
+ PYTHON="${HOME}/miniconda3/bin/python"
+elif [ -f "${HOME}/anaconda3/bin/python" ]; then
+ PYTHON="${HOME}/anaconda3/bin/python"
+elif command -v python3 &>/dev/null; then
+ PYTHON=python3
+else
+ echo "$(date): ERROR — python3 not found" >> "$LOGFILE"
+ exit 1
+fi
+
+MODE="${1:-generate}"
+
+{
+ echo "===== $(date) — ideaspark ${MODE} ====="
+ cd "${ENGRAM_DIR}"
+
+ case "$MODE" in
+ generate)
+ "$PYTHON" scripts/ideaspark_daily.py --generate 2>&1
+ ;;
+ feedback)
+ "$PYTHON" scripts/ideaspark_daily.py --feedback 2>&1
+ ;;
+ full)
+ "$PYTHON" scripts/ideaspark_daily.py 2>&1
+ ;;
+ *)
+ echo "Unknown mode: ${MODE}. Use generate, feedback, or full."
+ exit 1
+ ;;
+ esac
+
+ echo "===== done ====="
+ echo ""
+} >> "$LOGFILE" 2>&1
diff --git a/scripts/ideaspark_daily.py b/scripts/ideaspark_daily.py
new file mode 100644
index 0000000..45fcc10
--- /dev/null
+++ b/scripts/ideaspark_daily.py
@@ -0,0 +1,276 @@
+#!/usr/bin/env python3
+"""IdeaSpark daily runner — generates and posts one research idea to Slack.
+
+Required Slack Bot scopes (add in https://api.slack.com/apps):
+ - reactions:read (for feedback collection via emoji reactions)
+ - Already granted by engram: chat:write, im:write, im:read, im:history, users:read
+"""
+
+import argparse
+import json
+import logging
+import sys
+from datetime import datetime
+from pathlib import Path
+
+# Add project root to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from src.config import (
+ SLACK_AUTHORIZED_USERS,
+ get_user_timezone,
+ PROJECT_ROOT,
+)
+from src.ideaspark.agent import IdeaSparkAgent
+from src.ideaspark.memory import IdeaMemory
+from src.ideaspark.todoist_archive import archive_idea
+
+logger = logging.getLogger(__name__)
+
+
+
+def post_to_slack(brief: str, user_id: str | None = None) -> str | None:
+ """Post idea brief to Slack DM. Returns message timestamp for reaction tracking."""
+ try:
+ from src.integrations.slack import SlackClient
+ slack = SlackClient()
+
+ if not user_id:
+ if SLACK_AUTHORIZED_USERS:
+ user_id = SLACK_AUTHORIZED_USERS[0]
+ else:
+ logger.error("No authorized Slack users configured")
+ return None
+
+ # Open DM channel
+ response = slack._client.conversations_open(users=[user_id])
+ channel_id = response["channel"]["id"]
+
+ # Post the brief as a rich message
+ result = slack._client.chat_postMessage(
+ channel=channel_id,
+ text=brief,
+ mrkdwn=True,
+ )
+
+ ts = result.get("ts")
+ logger.info(f"Posted IdeaSpark to Slack (ts={ts})")
+ return ts
+
+ except Exception as e:
+ logger.error(f"Error posting to Slack: {e}")
+ return None
+
+
+def collect_reactions(channel_id: str, message_ts: str) -> str | None:
+ """Check for emoji reactions on a message. Returns reaction type or None."""
+ try:
+ from src.integrations.slack import SlackClient
+ slack = SlackClient()
+ result = slack._client.reactions_get(channel=channel_id, timestamp=message_ts)
+ message = result.get("message", {})
+ reactions = message.get("reactions", [])
+
+ for r in reactions:
+ name = r.get("name", "")
+ if name == "fire":
+ return "fire"
+ elif name == "thinking_face":
+ return "thinking"
+ elif name in ("-1", "thumbsdown"):
+ return "thumbsdown"
+
+ return None
+ except Exception as e:
+ logger.warning(f"Error collecting reactions: {e}")
+ return None
+
+
+def run_daily():
+ """Main daily execution."""
+ now = datetime.now(get_user_timezone())
+ logger.info(f"IdeaSpark daily run: {now.strftime('%Y-%m-%d %H:%M')}")
+
+ # Generate idea
+ agent = IdeaSparkAgent()
+ result = agent.generate_idea()
+
+ if result is None:
+ logger.info("No idea generated today (below quality threshold or error)")
+ return
+
+ # Post to Slack
+ brief = result["brief"]
+ ts = post_to_slack(brief)
+
+ if ts:
+ # Update the idea log with Slack timestamp for reaction tracking
+ memory = IdeaMemory()
+ for entry in memory.idea_log:
+ if entry["id"] == result["idea_number"]:
+ entry["slack_ts"] = ts
+ memory.save()
+ break
+
+ logger.info(
+ f"IdeaSpark #{result['idea_number']}: {result['title']} "
+ f"[N:{result['scores']['novelty']} F:{result['scores']['feasibility']} "
+ f"I:{result['scores']['impact']}]"
+ )
+
+
+def collect_feedback():
+ """Scan past ideas for new reactions and update preferences."""
+ from src.integrations.slack import SlackClient
+ memory = IdeaMemory()
+ slack = SlackClient()
+ user_id = SLACK_AUTHORIZED_USERS[0] if SLACK_AUTHORIZED_USERS else None
+
+ if not user_id:
+ logger.error("No authorized user for feedback collection")
+ return
+
+ # Open DM channel
+ response = slack._client.conversations_open(users=[user_id])
+ channel_id = response["channel"]["id"]
+
+ updated = 0
+ # Snapshot: iterate over a copy since thumbsdown deletes from the list
+ pending = [
+ e for e in memory.idea_log
+ if e.get("reaction") is None and e.get("slack_ts")
+ ]
+
+ for entry in pending:
+ reaction = collect_reactions(channel_id, entry["slack_ts"])
+ if not reaction:
+ continue
+
+ idea_id = entry["id"]
+ # Capture fields before record_feedback (thumbsdown deletes the entry)
+ idea_meta = {
+ "title": entry.get("title", f"Idea #{idea_id}"),
+ "date": entry.get("date", "")[:10],
+ "theme": entry.get("theme", ""),
+ "strategy": entry.get("strategy", ""),
+ "scores": entry.get("scores", {}),
+ "brief": entry.get("brief", ""),
+ "is_stretch": entry.get("is_stretch", False),
+ }
+
+ memory.record_feedback(idea_id, reaction)
+ logger.info(f"Idea #{idea_id}: reaction={reaction}")
+ updated += 1
+
+ # Archive to Todoist if 🔥
+ if reaction == "fire":
+ page_id = archive_idea(
+ idea_number=idea_id,
+ title=idea_meta["title"],
+ date_str=idea_meta["date"],
+ theme=idea_meta["theme"],
+ strategy=idea_meta["strategy"],
+ scores=idea_meta["scores"],
+ brief=idea_meta["brief"],
+ is_stretch=idea_meta["is_stretch"],
+ )
+ if page_id:
+ # Mark archived so retry loop doesn't duplicate
+ for e in memory.idea_log:
+ if e["id"] == idea_id:
+ e["todoist_archived"] = True
+ memory.save()
+ break
+
+ # Retry archiving for 🔥 ideas that failed earlier (e.g. notion-client missing)
+ for entry in memory.idea_log:
+ if entry.get("reaction") == "fire" and not entry.get("todoist_archived"):
+ page_id = archive_idea(
+ idea_number=entry["id"],
+ title=entry.get("title", f"Idea #{entry['id']}"),
+ date_str=entry.get("date", "")[:10],
+ theme=entry.get("theme", ""),
+ strategy=entry.get("strategy", ""),
+ scores=entry.get("scores", {}),
+ brief=entry.get("brief", ""),
+ is_stretch=entry.get("is_stretch", False),
+ )
+ if page_id:
+ entry["todoist_archived"] = True
+ memory.save()
+ logger.info(f"Retried archiving idea #{entry['id']} → Todoist {page_id}")
+ updated += 1
+
+ if updated:
+ logger.info(f"Collected/archived {updated} updates")
+
+ # Generate meta-summary if threshold reached
+ summary = memory.generate_meta_summary()
+ if summary:
+ logger.info(f"Meta-summary available:\n{summary}")
+
+
+def main():
+ parser = argparse.ArgumentParser(description="IdeaSpark daily research idea generator")
+ parser.add_argument("--generate", action="store_true", help="Generate and post today's idea")
+ parser.add_argument("--feedback", action="store_true", help="Collect reactions from past ideas")
+ parser.add_argument("--dry-run", action="store_true", help="Generate but don't post to Slack")
+ parser.add_argument("--status", action="store_true", help="Show idea log stats")
+
+ args = parser.parse_args()
+
+ logging.basicConfig(
+ level=logging.INFO,
+ format="%(asctime)s %(name)s %(levelname)s %(message)s",
+ )
+
+ if args.status:
+ memory = IdeaMemory()
+ print(f"Total ideas: {memory.get_idea_count()}")
+ print(f"🔥: {memory.preferences.get('total_fire', 0)}")
+ print(f"👎: {memory.preferences.get('total_thumbsdown', 0)}")
+ if memory.get_idea_count() > 0:
+ hit_rate = memory.preferences.get("total_fire", 0) / memory.get_idea_count() * 100
+ print(f"Hit rate: {hit_rate:.0f}%")
+ print(f"\nPreferred themes: {memory.get_preferred_themes()}")
+ print(f"Preferred strategy: {memory.get_preferred_strategy()}")
+ summary = memory.generate_meta_summary()
+ if summary:
+ print(f"\n{summary}")
+ return
+
+ if args.feedback:
+ collect_feedback()
+ return
+
+ if args.generate or args.dry_run:
+ agent = IdeaSparkAgent()
+ result = agent.generate_idea()
+
+ if result is None:
+ print("No idea generated (below quality threshold)")
+ return
+
+ print(result["brief"])
+ print(f"\n--- Metadata ---")
+ print(f"Title: {result['title']}")
+ print(f"Scores: {result['scores']}")
+ print(f"Theme: {result['theme']}")
+ print(f"Strategy: {result['strategy']}")
+ print(f"Stretch: {result['is_stretch']}")
+
+ if not args.dry_run:
+ ts = post_to_slack(result["brief"])
+ if ts:
+ print(f"\nPosted to Slack (ts={ts})")
+ else:
+ print("\nFailed to post to Slack")
+ return
+
+ # Default: run daily pipeline (generate + post + collect feedback)
+ run_daily()
+ collect_feedback()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/ideaspark_init.py b/scripts/ideaspark_init.py
new file mode 100644
index 0000000..d87d940
--- /dev/null
+++ b/scripts/ideaspark_init.py
@@ -0,0 +1,553 @@
+#!/usr/bin/env python3
+"""IdeaSpark initialization — build paper corpus from Google Scholar or manual input."""
+
+import argparse
+import json
+import logging
+import sys
+import time
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from src.config import PROJECT_ROOT
+from src.ideaspark.corpus import PaperCorpus, METHOD_TAGS, BIOLOGY_TAGS
+
+logger = logging.getLogger(__name__)
+
+DATA_DIR = PROJECT_ROOT / "data" / "ideaspark"
+
+# ── Hani Goodarzi's publication corpus (extracted from CV) ────────────
+# This is the seed corpus. Can be augmented by Scholar scraping.
+
+SEED_PAPERS = [
+ {
+ "title": "Systematic discovery of cap-independent translation sequences in human and viral genomes",
+ "authors": "Weingarten-Gabbay S, Elber S, ..., Goodarzi H, Segal E",
+ "year": 2016,
+ "journal": "Science",
+ "abstract": "Systematic identification of IRES elements using massively parallel reporter assays.",
+ "method_tags": ["deep learning", "sequence model"],
+ "biology_tags": ["RNA regulation", "translation regulation"],
+ },
+ {
+ "title": "Metastasis-suppressor transcript destabilization through TARBP2 binding of mRNA hairpins",
+ "authors": "Goodarzi H, et al.",
+ "year": 2014,
+ "journal": "Nature",
+ "abstract": "TARBP2 binds structured RNA elements in metastasis-suppressor transcripts to promote their degradation.",
+ "method_tags": ["RNA structure"],
+ "biology_tags": ["RNA regulation", "metastasis", "RNA structure switches"],
+ },
+ {
+ "title": "Endogenous tRNA-Derived Fragments Suppress Breast Cancer Progression via YBX1 Displacement",
+ "authors": "Goodarzi H, et al.",
+ "year": 2015,
+ "journal": "Cell",
+ "abstract": "tRNA-derived fragments suppress breast cancer metastasis by displacing YBX1 from oncogenic transcripts.",
+ "method_tags": ["CRISPR screen"],
+ "biology_tags": ["tRNA biology", "breast cancer", "metastasis", "RNA regulation"],
+ },
+ {
+ "title": "Modulated Expression of Specific tRNAs Drives Gene Expression and Cancer Progression",
+ "authors": "Goodarzi H, et al.",
+ "year": 2016,
+ "journal": "Cell",
+ "abstract": "Cancer cells modulate specific tRNA levels to drive codon-biased translation of pro-metastatic transcripts.",
+ "method_tags": ["codon optimization"],
+ "biology_tags": ["tRNA biology", "codon usage", "breast cancer", "metastasis", "translation regulation"],
+ },
+ {
+ "title": "HNRNPA2B1 Is a Mediator of m6A-Dependent Nuclear RNA Processing Events",
+ "authors": "Alarcón CR, Goodarzi H, et al.",
+ "year": 2015,
+ "journal": "Cell",
+ "abstract": "HNRNPA2B1 reads m6A marks to mediate alternative splicing and microRNA processing.",
+ "method_tags": ["RNA structure"],
+ "biology_tags": ["RNA regulation", "splicing dysregulation", "RBP networks"],
+ },
+ {
+ "title": "Revealing post-transcriptional regulatory elements through network-level conservation",
+ "authors": "Goodarzi H, Najafabadi HS, Oikonomou P, et al.",
+ "year": 2012,
+ "journal": "PLOS Computational Biology",
+ "abstract": "Computational framework for discovering post-transcriptional regulatory elements via network conservation.",
+ "method_tags": ["graph neural network", "sequence model"],
+ "biology_tags": ["RNA regulation", "RBP networks"],
+ },
+ {
+ "title": "Orphan non-coding RNA GIANA promotes breast cancer metastasis through a novel RNA structural interaction",
+ "authors": "Fish L, ..., Goodarzi H",
+ "year": 2018,
+ "journal": "Nature Medicine",
+ "abstract": "Discovery of a novel orphan ncRNA that drives metastasis through RNA structural interactions.",
+ "method_tags": ["RNA structure"],
+ "biology_tags": ["RNA regulation", "metastasis", "breast cancer", "RNA structure switches", "oncRNA"],
+ },
+ {
+ "title": "A quantitative proteomics tool to identify DNA-protein interactions in primary cells or blood",
+ "authors": "Goodarzi H, et al.",
+ "year": 2016,
+ "journal": "Journal of Proteome Research",
+ "abstract": "Quantitative proteomics method for identifying DNA-protein interactions from primary cells.",
+ "method_tags": ["mass spec"],
+ "biology_tags": ["RNA regulation"],
+ },
+ {
+ "title": "TARBP2 as an RNA-binding protein mediating post-transcriptional gene regulation in cancer",
+ "authors": "Fish L, ..., Goodarzi H",
+ "year": 2019,
+ "journal": "Molecular Cell",
+ "abstract": "TARBP2 regulates cancer-relevant transcripts via structured RNA elements in 3'UTRs.",
+ "method_tags": ["RNA structure", "CRISPR screen"],
+ "biology_tags": ["RNA regulation", "breast cancer", "RBP networks", "RNA structure switches"],
+ },
+ {
+ "title": "Compressed sensing of the human genome for RNA-based cancer detection",
+ "authors": "Fish L, ..., Goodarzi H",
+ "year": 2021,
+ "journal": "Science (submitted/published)",
+ "abstract": "Using RNA structural switches as compressed sensors of the cancer genome for liquid biopsy.",
+ "method_tags": ["foundation model", "deep learning", "liquid biopsy"],
+ "biology_tags": ["cancer detection", "RNA structure switches", "cell-free RNA", "oncRNA"],
+ },
+ {
+ "title": "RBMS1 suppresses colon cancer metastasis through targeted stabilization of its mRNA regulon",
+ "authors": "Zhang B, ..., Goodarzi H",
+ "year": 2020,
+ "journal": "Cancer Discovery",
+ "abstract": "RBMS1 stabilizes a metastasis-suppressive mRNA regulon in colon cancer.",
+ "method_tags": ["CRISPR screen"],
+ "biology_tags": ["RNA regulation", "metastasis", "RBP networks"],
+ },
+ {
+ "title": "Sense-antisense lncRNA pair encoded by human cancer genome",
+ "authors": "Nojima T, ..., Goodarzi H",
+ "year": 2021,
+ "journal": "Nature Cancer",
+ "abstract": "Characterization of sense-antisense lncRNA pairs in human cancers.",
+ "method_tags": ["RNA structure"],
+ "biology_tags": ["RNA regulation", "oncRNA"],
+ },
+ {
+ "title": "Codon-dependent translational rewiring in cancer",
+ "authors": "Lorent J, ..., Goodarzi H",
+ "year": 2022,
+ "journal": "Nature Cancer",
+ "abstract": "Cancer cells exploit codon-dependent translation to drive malignant phenotypes.",
+ "method_tags": ["codon optimization", "deep learning"],
+ "biology_tags": ["codon usage", "translation regulation"],
+ },
+ {
+ "title": "Codon usage and mRNA stability in cancer",
+ "authors": "Wu Q, ..., Goodarzi H",
+ "year": 2022,
+ "journal": "Nature Cell Biology",
+ "abstract": "Codon optimality controls mRNA stability in a cancer-specific manner.",
+ "method_tags": ["codon optimization", "sequence model"],
+ "biology_tags": ["codon usage", "RNA regulation", "translation regulation"],
+ },
+ {
+ "title": "Evo: DNA foundation model spanning all domains of life",
+ "authors": "Nguyen E, ..., Goodarzi H, et al.",
+ "year": 2024,
+ "journal": "Science",
+ "abstract": "Evo is a 7B-parameter DNA foundation model trained on 300B tokens spanning all domains of life, enabling prediction and generation at molecular to genome scale.",
+ "method_tags": ["foundation model", "deep learning", "generative model", "sequence model"],
+ "biology_tags": ["RNA regulation", "codon usage"],
+ },
+ {
+ "title": "Evo 2: genome modeling at 131k context",
+ "authors": "ArcInstitute team, Goodarzi H, et al.",
+ "year": 2025,
+ "journal": "Preprint/Science",
+ "abstract": "Evo 2 is a 40B-parameter DNA foundation model with 131k context, enabling long-range genomic understanding.",
+ "method_tags": ["foundation model", "deep learning", "generative model", "sequence model"],
+ "biology_tags": ["RNA regulation", "codon usage"],
+ },
+ {
+ "title": "Exai-1: multimodal cell-free RNA foundation model for liquid biopsy",
+ "authors": "Goodarzi H, et al.",
+ "year": 2024,
+ "journal": "Nature Medicine",
+ "abstract": "Multimodal cfRNA foundation model for cancer detection from liquid biopsy.",
+ "method_tags": ["foundation model", "deep learning", "liquid biopsy"],
+ "biology_tags": ["cancer detection", "cell-free RNA", "oncRNA"],
+ },
+ {
+ "title": "GENEVA: scalable molecular phenotyping of tumor models",
+ "authors": "Goodarzi H, et al.",
+ "year": 2024,
+ "journal": "Published",
+ "abstract": "Scalable molecular phenotyping platform for characterizing tumor model fidelity.",
+ "method_tags": ["flow cytometry", "drug screening"],
+ "biology_tags": ["drug response", "perturbation biology"],
+ },
+ {
+ "title": "SwitchSeeker: RNA structural switch discovery",
+ "authors": "Goodarzi H, et al.",
+ "year": 2024,
+ "journal": "Published",
+ "abstract": "Computational framework for discovering RNA structural switches genome-wide.",
+ "method_tags": ["RNA structure", "deep learning", "sequence model"],
+ "biology_tags": ["RNA regulation", "RNA structure switches"],
+ },
+ {
+ "title": "Artificial intelligence in drug discovery and development",
+ "authors": "Goodarzi H, et al.",
+ "year": 2020,
+ "journal": "Frontiers in Artificial Intelligence",
+ "abstract": "Review of AI methods for drug discovery and development.",
+ "method_tags": ["deep learning", "NLP/LLM"],
+ "biology_tags": ["drug response"],
+ },
+ {
+ "title": "Androgen signaling regulates SARS-CoV-2 entry in human airway cells",
+ "authors": "..., Goodarzi H",
+ "year": 2020,
+ "journal": "Cell Stem Cell",
+ "abstract": "Androgen receptor signaling modulates ACE2 and TMPRSS2 expression affecting COVID-19 susceptibility.",
+ "method_tags": ["single-cell"],
+ "biology_tags": ["RNA regulation"],
+ },
+ # ── Additional publications from CV ──────────────────────────────────
+ {
+ "title": "Systematic discovery of structural elements governing stability of mammalian messenger RNAs",
+ "authors": "Goodarzi H, Najafabadi HS, Oikonomou P, et al.",
+ "year": 2012,
+ "journal": "Nature",
+ "abstract": "Systematic discovery of cis-regulatory structural elements in 3'UTRs that control mRNA stability in mammalian cells.",
+ "method_tags": ["sequence model", "RNA structure"],
+ "biology_tags": ["RNA regulation", "RNA structure switches"],
+ },
+ {
+ "title": "Asparagine bioavailability governs metastasis in a model of breast cancer",
+ "authors": "Knott SRV, ..., Goodarzi H, Poulogiannis G, Hannon GJ",
+ "year": 2018,
+ "journal": "Nature",
+ "abstract": "Asparagine availability promotes breast cancer metastasis; dietary restriction or asparaginase reduce metastatic potential.",
+ "method_tags": ["CRISPR screen"],
+ "biology_tags": ["metastasis", "breast cancer", "drug response"],
+ },
+ {
+ "title": "Tumoural activation of TLR3-SLIT2 axis in endothelium drives metastasis",
+ "authors": "Tavora B, ..., Goodarzi H, Tavazoie SF",
+ "year": 2020,
+ "journal": "Nature",
+ "abstract": "Tumor-derived signals activate endothelial TLR3-SLIT2 axis to promote metastatic dissemination.",
+ "method_tags": [],
+ "biology_tags": ["metastasis", "tumor microenvironment"],
+ },
+ {
+ "title": "N6-methyladenosine marks primary miRNAs for processing",
+ "authors": "Alarcon C, Lee H, Goodarzi H, Tavazoie SF",
+ "year": 2015,
+ "journal": "Nature",
+ "abstract": "m6A modification of primary miRNAs facilitates their recognition and processing by DGCR8.",
+ "method_tags": ["RNA structure"],
+ "biology_tags": ["RNA regulation", "RBP networks"],
+ },
+ {
+ "title": "A pro-metastatic splicing program regulated by SNRPA1 interactions with structured RNA elements",
+ "authors": "Fish L, Khoroshkin M, Navickas A, ..., Goodarzi H",
+ "year": 2021,
+ "journal": "Science",
+ "abstract": "SNRPA1 drives a pro-metastatic alternative splicing program through recognition of RNA structural elements.",
+ "method_tags": ["RNA structure", "CRISPR screen"],
+ "biology_tags": ["splicing dysregulation", "metastasis", "RNA structure switches", "RBP networks"],
+ },
+ {
+ "title": "ERα is an RNA-binding protein sustaining tumor cell survival and drug resistance",
+ "authors": "Xu Y, ..., Goodarzi H, Ruggero D",
+ "year": 2021,
+ "journal": "Cell",
+ "abstract": "ERα functions as an RNA-binding protein to stabilize transcripts that sustain tumor cell survival and drug resistance.",
+ "method_tags": [],
+ "biology_tags": ["RNA regulation", "drug response", "RBP networks", "breast cancer"],
+ },
+ {
+ "title": "Genomic Hallmarks and Structural Variation in Metastatic Prostate Cancer",
+ "authors": "Quigley DA, ..., Goodarzi H, Gilbert LA, ..., Feng FY",
+ "year": 2018,
+ "journal": "Cell",
+ "abstract": "Comprehensive genomic characterization of metastatic castration-resistant prostate cancer, revealing structural variants and non-coding alterations.",
+ "method_tags": ["deep learning"],
+ "biology_tags": ["prostate cancer", "tumor evolution"],
+ },
+ {
+ "title": "Functional Genomics In Vivo Reveal Metabolic Dependencies of Pancreatic Cancer Cells",
+ "authors": "Zhu XG, ..., Goodarzi H, Birsoy K",
+ "year": 2020,
+ "journal": "Cell Metabolism",
+ "abstract": "In vivo functional genomics screen reveals metabolic dependencies specific to pancreatic cancer.",
+ "method_tags": ["CRISPR screen"],
+ "biology_tags": ["drug response", "perturbation biology"],
+ },
+ {
+ "title": "An mRNA processing pathway suppresses metastasis by governing translational control from the nucleus",
+ "authors": "Navickas A, Asgharian H, ..., Goodarzi H",
+ "year": 2021,
+ "journal": "Nature Cell Biology",
+ "abstract": "Nuclear mRNA processing pathway controls translation of metastasis-related transcripts.",
+ "method_tags": ["CRISPR screen"],
+ "biology_tags": ["RNA regulation", "metastasis", "translation regulation", "splicing dysregulation"],
+ },
+ {
+ "title": "The LC3-conjugation machinery specifies the loading of RNA-binding proteins into extracellular vesicles",
+ "authors": "Leidal AM, ..., Goodarzi H, ..., Debnath J",
+ "year": 2020,
+ "journal": "Nature Cell Biology",
+ "abstract": "LC3-conjugation machinery directs specific RNA-binding proteins and their RNA targets into extracellular vesicles.",
+ "method_tags": [],
+ "biology_tags": ["RNA regulation", "RBP networks", "cell-free RNA"],
+ },
+ {
+ "title": "Mechanosensitive pannexin-1 channels mediate microvascular metastatic cell survival",
+ "authors": "Furlow PW, ..., Goodarzi H, ..., Tavazoie SF",
+ "year": 2015,
+ "journal": "Nature Cell Biology",
+ "abstract": "Pannexin-1 channels enable metastatic cells to survive in the vasculature through mechanosensitive signaling.",
+ "method_tags": [],
+ "biology_tags": ["metastasis"],
+ },
+ {
+ "title": "A stress-induced tyrosine-tRNA depletion response mediates codon-based translational repression and growth suppression",
+ "authors": "Huh D, ..., Goodarzi H, Tavazoie SF",
+ "year": 2020,
+ "journal": "EMBO Journal",
+ "abstract": "Stress-induced depletion of tyrosine-tRNA drives codon-dependent translational repression of growth-promoting genes.",
+ "method_tags": ["codon optimization"],
+ "biology_tags": ["tRNA biology", "codon usage", "translation regulation"],
+ },
+ {
+ "title": "FTO controls reversible m6Am RNA methylation during snRNA biogenesis",
+ "authors": "Mauer J, ..., Goodarzi H, Jaffrey S",
+ "year": 2019,
+ "journal": "Nature Chemical Biology",
+ "abstract": "FTO demethylates m6Am on snRNAs, revealing a reversible epitranscriptomic modification.",
+ "method_tags": [],
+ "biology_tags": ["RNA regulation"],
+ },
+ {
+ "title": "Inference of RNA decay rate from transcriptional profiling highlights the regulatory programs of Alzheimer's disease",
+ "authors": "Alkallas R, Fish L, Goodarzi H, Najafabadi HS",
+ "year": 2017,
+ "journal": "Nature Communications",
+ "abstract": "Computational inference of RNA decay rates from transcriptional profiling reveals disease-associated regulatory programs.",
+ "method_tags": ["sequence model"],
+ "biology_tags": ["RNA regulation"],
+ },
+ {
+ "title": "Highly variable cancer subpopulations that exhibit enhanced transcriptome variability and metastatic fitness",
+ "authors": "Nguyen A, Yoshida M, Goodarzi H, Tavazoie SF",
+ "year": 2016,
+ "journal": "Nature Communications",
+ "abstract": "Cancer subpopulations with high transcriptome variability exhibit enhanced metastatic fitness.",
+ "method_tags": ["single-cell"],
+ "biology_tags": ["tumor evolution", "metastasis"],
+ },
+ {
+ "title": "Muscleblind-like 1 suppresses breast cancer metastatic colonization and stabilizes metastasis suppressor transcripts",
+ "authors": "Fish L, Pencheva N, Goodarzi H, et al.",
+ "year": 2016,
+ "journal": "Genes & Development",
+ "abstract": "MBNL1 suppresses metastatic colonization by stabilizing transcripts of metastasis suppressor genes.",
+ "method_tags": [],
+ "biology_tags": ["RNA regulation", "metastasis", "breast cancer", "RBP networks"],
+ },
+ {
+ "title": "TMEM2 Is a SOX4-Regulated Gene That Mediates Metastatic Migration and Invasion in Breast Cancer",
+ "authors": "Lee H, Goodarzi H, Tavazoie SF, Alarcon CR",
+ "year": 2016,
+ "journal": "Cancer Research",
+ "abstract": "TMEM2 promotes breast cancer metastasis downstream of SOX4 transcriptional regulation.",
+ "method_tags": [],
+ "biology_tags": ["metastasis", "breast cancer"],
+ },
+ {
+ "title": "PAPERCLIP Identifies MicroRNA Targets and a Role of CstF64/64tau in Promoting Non-canonical poly(A) Site Usage",
+ "authors": "Hwang HW, ..., Goodarzi H, ..., Darnell RB",
+ "year": 2016,
+ "journal": "Cell Reports",
+ "abstract": "PAPERCLIP method for mapping protein-RNA interactions reveals miRNA target sites and polyadenylation regulation.",
+ "method_tags": [],
+ "biology_tags": ["RNA regulation", "RBP networks"],
+ },
+ {
+ "title": "Systematic Identification of Regulatory Elements in Conserved 3'UTRs of Human Transcripts",
+ "authors": "Oikonomou P, Goodarzi H, Tavazoie S",
+ "year": 2014,
+ "journal": "Cell Reports",
+ "abstract": "Systematic computational and experimental identification of conserved regulatory elements in human 3'UTRs.",
+ "method_tags": ["sequence model"],
+ "biology_tags": ["RNA regulation"],
+ },
+ {
+ "title": "A massively parallel 3'UTR reporter assay reveals relationships between nucleotide content, sequence conservation, and mRNA destabilization",
+ "authors": "Litterman J, ..., Goodarzi H, Erle DJ, Ansel KM",
+ "year": 2019,
+ "journal": "Genome Research",
+ "abstract": "Massively parallel reporter assay systematically maps 3'UTR elements that control mRNA stability.",
+ "method_tags": ["deep learning"],
+ "biology_tags": ["RNA regulation"],
+ },
+ {
+ "title": "A global cancer data integrator reveals principles of synthetic lethality, sex disparity and immunotherapy",
+ "authors": "Yogodzinski C, Arab A, Pritchard JR, Goodarzi H, Gilbert LA",
+ "year": 2021,
+ "journal": "Genome Medicine",
+ "abstract": "Integrated cancer data platform reveals synthetic lethal interactions, sex-based disparities, and immunotherapy response patterns.",
+ "method_tags": ["deep learning"],
+ "biology_tags": ["drug response", "perturbation biology"],
+ },
+ {
+ "title": "The molecular consequences of androgen activity in the human breast",
+ "authors": "Raths F, Karimzadeh M, ..., Goodarzi H, ..., Knott SRV",
+ "year": 2022,
+ "journal": "Cell Genomics",
+ "abstract": "Characterization of androgen receptor signaling consequences in human breast tissue.",
+ "method_tags": ["single-cell"],
+ "biology_tags": ["breast cancer", "RNA regulation"],
+ },
+ {
+ "title": "Revealing Global Regulatory Perturbations across Human Cancers",
+ "authors": "Goodarzi H, Elemento O, Tavazoie S",
+ "year": 2009,
+ "journal": "Molecular Cell",
+ "abstract": "Computational framework revealing global regulatory perturbations in post-transcriptional programs across cancer types.",
+ "method_tags": ["sequence model", "graph neural network"],
+ "biology_tags": ["RNA regulation", "RBP networks"],
+ },
+ {
+ "title": "Global discovery of adaptive mutations",
+ "authors": "Goodarzi H, Hottes AK, Tavazoie S",
+ "year": 2009,
+ "journal": "Nature Methods",
+ "abstract": "Computational method for systematically discovering adaptive mutations from evolution experiments.",
+ "method_tags": ["deep learning"],
+ "biology_tags": [],
+ },
+ {
+ "title": "MicroRNA-203 predicts human survival after resection of colorectal liver metastasis",
+ "authors": "Kingham PT, ..., Goodarzi H, Tavazoie SF",
+ "year": 2016,
+ "journal": "Oncotarget",
+ "abstract": "miR-203 expression predicts survival outcomes following colorectal cancer liver metastasis resection.",
+ "method_tags": [],
+ "biology_tags": ["metastasis", "cancer detection"],
+ },
+ {
+ "title": "Massively multiplex single-molecule oligonucleosome footprinting",
+ "authors": "Abdulhay NJ, ..., Goodarzi H, Narlikar GJ, Ramani V",
+ "year": 2020,
+ "journal": "eLife",
+ "abstract": "Single-molecule method for massively parallel chromatin accessibility profiling at nucleosome resolution.",
+ "method_tags": ["single-cell"],
+ "biology_tags": [],
+ },
+]
+
+
+def build_corpus_from_seed():
+ """Initialize corpus from the hardcoded seed papers."""
+ corpus = PaperCorpus()
+ corpus.build_from_list(SEED_PAPERS)
+ print(f"Built corpus with {corpus.size} papers")
+
+ # Embed all papers
+ print("Generating embeddings (this may take a moment)...")
+ try:
+ corpus.embed_all()
+ print(f"Embeddings generated: shape {corpus.embeddings.shape}")
+ except Exception as e:
+ print(f"Warning: Could not generate embeddings: {e}")
+ print("Corpus saved without embeddings (semantic search will use random sampling)")
+
+ corpus.save()
+ print(f"Corpus saved to {DATA_DIR / 'papers_corpus.json'}")
+
+
+def build_from_json(path: str):
+ """Build corpus from a JSON file with paper dicts."""
+ with open(path) as f:
+ papers = json.load(f)
+ corpus = PaperCorpus()
+ corpus.build_from_list(papers)
+ print(f"Loaded {corpus.size} papers from {path}")
+
+ print("Generating embeddings...")
+ try:
+ corpus.embed_all()
+ except Exception as e:
+ print(f"Warning: {e}")
+
+ corpus.save()
+ print("Done.")
+
+
+def auto_tag_papers():
+ """Run auto-tagging on existing corpus papers that lack tags."""
+ corpus = PaperCorpus()
+ if not corpus.papers:
+ print("No papers in corpus")
+ return
+
+ tagged = 0
+ for p in corpus.papers:
+ if p.get("method_tags") and p.get("biology_tags"):
+ continue
+
+ text = f"{p.get('title', '')} {p.get('abstract', '')}".lower()
+
+ if not p.get("method_tags"):
+ p["method_tags"] = [t for t in METHOD_TAGS if t.lower() in text]
+ if not p.get("biology_tags"):
+ p["biology_tags"] = [t for t in BIOLOGY_TAGS if t.lower() in text]
+ tagged += 1
+
+ corpus.save()
+ print(f"Auto-tagged {tagged} papers")
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Initialize IdeaSpark paper corpus")
+ parser.add_argument("--seed", action="store_true", help="Build from hardcoded seed papers")
+ parser.add_argument("--json", type=str, help="Build from a JSON file")
+ parser.add_argument("--auto-tag", action="store_true", help="Auto-tag untagged papers")
+ parser.add_argument("--status", action="store_true", help="Show corpus status")
+
+ args = parser.parse_args()
+
+ logging.basicConfig(level=logging.INFO)
+
+ if args.status:
+ corpus = PaperCorpus()
+ print(f"Corpus: {corpus.summary()}")
+ if corpus.papers:
+ years = [p.get("year", 0) for p in corpus.papers]
+ print(f"Years: {min(years)}–{max(years)}")
+ journals = set(p.get("journal", "") for p in corpus.papers)
+ print(f"Journals: {len(journals)}")
+ with_tags = sum(1 for p in corpus.papers if p.get("method_tags"))
+ print(f"Tagged: {with_tags}/{len(corpus.papers)}")
+ return
+
+ if args.seed:
+ build_corpus_from_seed()
+ return
+
+ if args.json:
+ build_from_json(args.json)
+ return
+
+ if args.auto_tag:
+ auto_tag_papers()
+ return
+
+ # Default: build from seed
+ print("No arguments provided. Building from seed corpus...")
+ build_corpus_from_seed()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/scripts/launchd/com.engram.ideaspark.feedback.plist b/scripts/launchd/com.engram.ideaspark.feedback.plist
new file mode 100644
index 0000000..c7f7759
--- /dev/null
+++ b/scripts/launchd/com.engram.ideaspark.feedback.plist
@@ -0,0 +1,29 @@
+
+
+
+
+ Label
+ com.engram.ideaspark.feedback
+
+ ProgramArguments
+
+ /Users/hani/Box Sync/CLAUDE/engram/scripts/ideaspark_cron.sh
+ feedback
+
+
+
+ StartCalendarInterval
+
+ Hour
+ 22
+ Minute
+ 0
+
+
+ StandardOutPath
+ /Users/hani/Box Sync/CLAUDE/engram/logs/ideaspark_feedback.out.log
+ StandardErrorPath
+ /Users/hani/Box Sync/CLAUDE/engram/logs/ideaspark_feedback.err.log
+
+
+
diff --git a/scripts/launchd/com.engram.ideaspark.generate.plist b/scripts/launchd/com.engram.ideaspark.generate.plist
new file mode 100644
index 0000000..967a04c
--- /dev/null
+++ b/scripts/launchd/com.engram.ideaspark.generate.plist
@@ -0,0 +1,29 @@
+
+
+
+
+ Label
+ com.engram.ideaspark.generate
+
+ ProgramArguments
+
+ /Users/hani/Box Sync/CLAUDE/engram/scripts/ideaspark_cron.sh
+ generate
+
+
+
+ StartCalendarInterval
+
+ Hour
+ 5
+ Minute
+ 0
+
+
+ StandardOutPath
+ /Users/hani/Box Sync/CLAUDE/engram/logs/ideaspark_generate.out.log
+ StandardErrorPath
+ /Users/hani/Box Sync/CLAUDE/engram/logs/ideaspark_generate.err.log
+
+
+
diff --git a/src/bot/actions/calendar_actions.py b/src/bot/actions/calendar_actions.py
index 4aa07dc..9b2a013 100644
--- a/src/bot/actions/calendar_actions.py
+++ b/src/bot/actions/calendar_actions.py
@@ -2,11 +2,11 @@
import logging
from dataclasses import dataclass, field
-from datetime import datetime, timedelta, timezone
+from datetime import datetime, timedelta
from typing import Any
-from .confirmable import PendingAction
from ...config import PRIMARY_ACCOUNT
+from .confirmable import PendingAction
logger = logging.getLogger(__name__)
@@ -84,7 +84,6 @@ def get_preview(self) -> str:
def execute(self) -> dict[str, Any]:
"""Create the calendar event."""
from ...integrations.gcalendar import CalendarClient
- from ...config import get_user_timezone
try:
# Parse the date and time
@@ -124,76 +123,9 @@ def execute(self) -> dict[str, Any]:
def _parse_datetime(self) -> datetime:
"""Parse date_str and time_str into a datetime."""
- from ...config import get_user_timezone
-
- tz = get_user_timezone()
- now = datetime.now(tz)
-
- # Parse date
- date_lower = self.date_str.lower()
- if date_lower == "today":
- target_date = now.date()
- elif date_lower == "tomorrow":
- target_date = (now + timedelta(days=1)).date()
- elif date_lower == "yesterday":
- target_date = (now - timedelta(days=1)).date()
- else:
- # Try day names (next occurrence)
- day_names = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
- if date_lower in day_names:
- target_weekday = day_names.index(date_lower)
- days_ahead = target_weekday - now.weekday()
- if days_ahead <= 0: # Target day already happened this week
- days_ahead += 7
- target_date = (now + timedelta(days=days_ahead)).date()
- else:
- # Try ISO format
- try:
- target_date = datetime.fromisoformat(self.date_str).date()
- except ValueError:
- # Fall back to today
- target_date = now.date()
-
- # Parse time
- time_lower = self.time_str.lower().strip()
- hour = 12 # Default to noon
- minute = 0
-
- if time_lower == "noon":
- hour, minute = 12, 0
- elif time_lower == "midnight":
- hour, minute = 0, 0
- elif ":" in time_lower:
- # Format like "14:00" or "2:30pm"
- time_part = time_lower.replace("am", "").replace("pm", "").strip()
- parts = time_part.split(":")
- hour = int(parts[0])
- minute = int(parts[1]) if len(parts) > 1 else 0
- if "pm" in time_lower and hour < 12:
- hour += 12
- elif "am" in time_lower and hour == 12:
- hour = 0
- else:
- # Format like "2pm" or "14"
- time_clean = time_lower.replace("am", "").replace("pm", "").strip()
- try:
- hour = int(time_clean)
- if "pm" in time_lower and hour < 12:
- hour += 12
- elif "am" in time_lower and hour == 12:
- hour = 0
- except ValueError:
- pass
-
- # Combine date and time
- return datetime(
- year=target_date.year,
- month=target_date.month,
- day=target_date.day,
- hour=hour,
- minute=minute,
- tzinfo=tz,
- )
+ from ..datetime_utils import parse_event_datetime
+
+ return parse_event_datetime(self.date_str, self.time_str)
def get_action_type(self) -> str:
return "Create Calendar Event"
diff --git a/src/bot/agents/base.py b/src/bot/agents/base.py
index 15759f0..2d95a0f 100644
--- a/src/bot/agents/base.py
+++ b/src/bot/agents/base.py
@@ -1,19 +1,19 @@
"""Base agent class for specialized domain agents."""
-import json
import logging
from abc import ABC, abstractmethod
+from collections.abc import Generator
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
-from typing import Any, Generator
+from typing import Any
from anthropic import Anthropic
+from ...config import AGENT_MODEL, ANTHROPIC_API_KEY, get_user_timezone
from ..conversation import ConversationContext
-from ..tools import ToolResult, get_tool_schemas, TOOL_NAME_MAP
+from ..tools import get_tool_schemas
from ..user_memory import UserMemory
-from ...config import ANTHROPIC_API_KEY, AGENT_MODEL
logger = logging.getLogger(__name__)
@@ -156,8 +156,10 @@ def _build_system_prompt(self, context: ConversationContext) -> str:
"""
prompt = self.system_prompt
- # Add current date
- current_date = datetime.now().strftime("%Y-%m-%d %A")
+ # Add current local date/time
+ current_date = datetime.now(get_user_timezone()).strftime(
+ "%Y-%m-%d %A %I:%M %p %Z"
+ )
prompt = prompt.replace("{current_date}", current_date)
# Inject user memory context if available
@@ -192,11 +194,12 @@ def _build_messages(
"content": msg["content"],
})
- # Add current message
- messages.append({
+ current_message = {
"role": "user",
"content": message,
- })
+ }
+ if not messages or messages[-1] != current_message:
+ messages.append(current_message)
return messages
@@ -284,8 +287,7 @@ def run(
})
if (
- tool_name == "SendEmailTool"
- and result.success
+ result.success
and isinstance(result.data, dict)
and result.data.get("requires_confirmation")
):
@@ -480,8 +482,7 @@ def run_streaming(
})
if (
- tool_name == "SendEmailTool"
- and result.success
+ result.success
and isinstance(result.data, dict)
and result.data.get("requires_confirmation")
):
diff --git a/src/bot/agents/calendar_agent.py b/src/bot/agents/calendar_agent.py
index f870d9e..ba1a01e 100644
--- a/src/bot/agents/calendar_agent.py
+++ b/src/bot/agents/calendar_agent.py
@@ -1,10 +1,9 @@
"""Calendar specialist agent."""
import logging
-from typing import Any
-from .base import BaseAgent, AgentType
from ..conversation import ConversationContext
+from .base import AgentType, BaseAgent
logger = logging.getLogger(__name__)
@@ -50,7 +49,7 @@ def system_prompt(self) -> str:
Your expertise is managing calendar events and scheduling across multiple Google accounts.
-Today's date: {current_date}
+Current local date/time: {current_date}
CAPABILITIES:
- Check calendar events for any date (today, tomorrow, specific dates)
@@ -61,10 +60,11 @@ def system_prompt(self) -> str:
GUIDELINES:
1. Always specify the date context clearly in your responses
-2. When showing events, organize by time of day
-3. For availability checks, suggest the best slots based on typical patterns
-4. Be concise but include key details (time, meeting name, location if available)
-5. Use RespondToUserTool to send your final response
+2. For "next", "upcoming", or "what's next" requests, use the tool's next_event and upcoming_events fields. Do not describe events whose end time is before current_time as upcoming.
+3. When showing full-day event listings, organize by time of day
+4. For availability checks, suggest the best slots based on typical patterns
+5. Be concise but include key details (time, meeting name, location if available)
+6. Use RespondToUserTool to send your final response
RESPONSE FORMAT:
- For event listings: Group by morning/afternoon/evening
diff --git a/src/bot/agents/orchestrator.py b/src/bot/agents/orchestrator.py
index 9dca835..73211fd 100644
--- a/src/bot/agents/orchestrator.py
+++ b/src/bot/agents/orchestrator.py
@@ -150,6 +150,10 @@ def _is_conversational(self, message: str) -> bool:
"""Check if message is purely conversational (greetings, small talk)."""
message_lower = message.lower().strip()
+ # Never treat as conversational if it contains personal data keywords
+ if self._needs_personal_data(message):
+ return False
+
# Check greetings
if message_lower in GREETINGS:
return True
diff --git a/src/bot/app.py b/src/bot/app.py
index fed071d..1891026 100644
--- a/src/bot/app.py
+++ b/src/bot/app.py
@@ -2,6 +2,8 @@
import atexit
import logging
+import os
+import time
from typing import Callable
from apscheduler.schedulers.background import BackgroundScheduler
@@ -83,12 +85,26 @@ def on_shutdown():
enable_streaming=streaming,
)
- # Add global error handler
+ # Track repeated connection failures and restart if stuck
+ _error_state = {"broken_pipe_count": 0, "last_broken_pipe": 0.0}
+
@app.error
def global_error_handler(error, body, logger):
logger.error(f"Error: {error}")
logger.error(f"Request body: {body}")
+ if isinstance(error, BrokenPipeError):
+ now = time.time()
+ # Reset counter if last error was > 60s ago (not a tight loop)
+ if now - _error_state["last_broken_pipe"] > 60:
+ _error_state["broken_pipe_count"] = 0
+ _error_state["broken_pipe_count"] += 1
+ _error_state["last_broken_pipe"] = now
+
+ if _error_state["broken_pipe_count"] >= 5:
+ logger.error("BrokenPipeError loop detected — exiting for launchd restart")
+ os._exit(1)
+
# Create Socket Mode handler
handler = SocketModeHandler(app, app_token)
@@ -169,7 +185,16 @@ def _setup_proactive_scheduler(
replace_existing=True,
)
- logger.info("Proactive scheduler configured with 4 jobs")
+ # Release stale Google API connections every 4 hours to prevent socket exhaustion
+ scheduler.add_job(
+ heartbeat.cleanup_connections,
+ IntervalTrigger(hours=4),
+ id="cleanup_connections",
+ name="Release Google API connections",
+ replace_existing=True,
+ )
+
+ logger.info("Proactive scheduler configured with 5 jobs")
return scheduler
diff --git a/src/bot/datetime_utils.py b/src/bot/datetime_utils.py
new file mode 100644
index 0000000..1bfb744
--- /dev/null
+++ b/src/bot/datetime_utils.py
@@ -0,0 +1,87 @@
+"""Date and time parsing helpers for bot actions."""
+
+from datetime import datetime, timedelta
+
+from ..config import get_user_timezone
+
+
+def parse_event_datetime(date_str: str, time_str: str) -> datetime:
+ """Parse bot event date/time strings into a timezone-aware datetime."""
+ tz = get_user_timezone()
+ now = datetime.now(tz)
+
+ date_lower = date_str.lower().strip()
+ if date_lower == "today":
+ target_date = now.date()
+ elif date_lower == "tomorrow":
+ target_date = (now + timedelta(days=1)).date()
+ elif date_lower == "yesterday":
+ target_date = (now - timedelta(days=1)).date()
+ else:
+ day_names = [
+ "monday",
+ "tuesday",
+ "wednesday",
+ "thursday",
+ "friday",
+ "saturday",
+ "sunday",
+ ]
+ if date_lower in day_names:
+ target_weekday = day_names.index(date_lower)
+ days_ahead = target_weekday - now.weekday()
+ if days_ahead <= 0:
+ days_ahead += 7
+ target_date = (now + timedelta(days=days_ahead)).date()
+ else:
+ try:
+ target_date = datetime.fromisoformat(date_str).date()
+ except ValueError as exc:
+ raise ValueError(f"Could not parse event date: {date_str}") from exc
+
+ hour, minute = parse_event_time(time_str)
+
+ return datetime(
+ year=target_date.year,
+ month=target_date.month,
+ day=target_date.day,
+ hour=hour,
+ minute=minute,
+ tzinfo=tz,
+ )
+
+
+def parse_event_time(time_str: str) -> tuple[int, int]:
+ """Parse a compact event time string into hour/minute."""
+ time_lower = time_str.lower().strip()
+
+ if time_lower == "noon":
+ return 12, 0
+ if time_lower == "midnight":
+ return 0, 0
+
+ if ":" in time_lower:
+ time_part = time_lower.replace("am", "").replace("pm", "").strip()
+ parts = time_part.split(":")
+ try:
+ hour = int(parts[0])
+ minute = int(parts[1]) if len(parts) > 1 else 0
+ except ValueError as exc:
+ raise ValueError(f"Could not parse event time: {time_str}") from exc
+ else:
+ time_clean = time_lower.replace("am", "").replace("pm", "").strip()
+ try:
+ hour = int(time_clean)
+ minute = 0
+ except ValueError as exc:
+ raise ValueError(f"Could not parse event time: {time_str}") from exc
+
+ if "pm" in time_lower and hour < 12:
+ hour += 12
+ elif "am" in time_lower and hour == 12:
+ hour = 0
+
+ if not 0 <= hour <= 23 or not 0 <= minute <= 59:
+ raise ValueError(f"Event time out of range: {time_str}")
+
+ return hour, minute
diff --git a/src/bot/event_handlers.py b/src/bot/event_handlers.py
index ee8f61a..dfe9098 100644
--- a/src/bot/event_handlers.py
+++ b/src/bot/event_handlers.py
@@ -7,18 +7,24 @@
from slack_bolt import App
-from ..config import SLACK_AUTHORIZED_USERS, SLACK_ALLOW_ALL_USERS, BOT_MODE, ENABLE_STREAMING, STREAMING_UPDATE_INTERVAL
+from ..config import (
+ BOT_MODE,
+ ENABLE_STREAMING,
+ SLACK_ALLOW_ALL_USERS,
+ SLACK_AUTHORIZED_USERS,
+ STREAMING_UPDATE_INTERVAL,
+)
+from .audit import AuditEventType, get_audit_logger
from .conversation import ConversationManager
from .formatters import format_error_message, format_help_message, markdown_to_slack
-from .intent_router import IntentRouter, Intent
-from .security import SecurityGuard, SecurityLevel, ThreatType, get_security_guard
-from .audit import AuditLogger, AuditEventType, get_audit_logger
+from .intent_router import Intent, IntentRouter
+from .security import get_security_guard
if TYPE_CHECKING:
- from .user_memory import UserMemory
- from .feedback_loop import FeedbackLoop
- from .executor import AgentExecutor, StreamEvent
from .agents.orchestrator import Orchestrator
+ from .executor import AgentExecutor
+ from .feedback_loop import FeedbackLoop
+ from .user_memory import UserMemory
logger = logging.getLogger(__name__)
@@ -50,20 +56,20 @@ def register_event_handlers(
logger.info(f"Bot running in '{bot_mode}' mode (streaming: {streaming_enabled})")
# Initialize based on mode
- agent_executor: "AgentExecutor | None" = None
- orchestrator: "Orchestrator | None" = None
+ agent_executor: AgentExecutor | None = None
+ orchestrator: Orchestrator | None = None
intent_router: IntentRouter | None = None
handlers: dict | None = None
if bot_mode == "multi_agent":
# Use multi-agent architecture with orchestrator
- from .agents.orchestrator import Orchestrator
- orchestrator = Orchestrator(user_memory=user_memory)
+ from .agents.orchestrator import Orchestrator as OrchestratorClass
+ orchestrator = OrchestratorClass(user_memory=user_memory)
logger.info(f"Initialized Orchestrator with specialists: {orchestrator.get_available_specialists()}")
elif bot_mode == "agent":
# Use new agent executor with tool calling
- from .executor import AgentExecutor
- agent_executor = AgentExecutor(user_memory=user_memory)
+ from .executor import AgentExecutor as AgentExecutorClass
+ agent_executor = AgentExecutorClass(user_memory=user_memory)
logger.info("Initialized AgentExecutor for tool calling")
else:
# Use legacy intent routing (streaming not supported)
@@ -71,12 +77,12 @@ def register_event_handlers(
intent_router = IntentRouter()
# Import handlers
- from .handlers.chat import ChatHandler
- from .handlers.search import SearchHandler
+ from .handlers.briefing import BriefingHandler
from .handlers.calendar import CalendarHandler
+ from .handlers.chat import ChatHandler
from .handlers.email import EmailHandler
from .handlers.github import GitHubHandler
- from .handlers.briefing import BriefingHandler
+ from .handlers.search import SearchHandler
# Initialize handlers (lazy - they'll load resources when needed)
handlers = {
@@ -128,7 +134,6 @@ def handle_cancel(ack, body, client) -> None:
def _handle_message(event: dict, say, client, is_dm: bool) -> None:
"""Common message handling logic."""
- start_time = time.time()
user_id = event.get("user")
channel_id = event.get("channel")
thread_ts = event.get("thread_ts") or event.get("ts")
@@ -291,9 +296,6 @@ def _handle_message(event: dict, say, client, is_dm: bool) -> None:
except Exception as e:
logger.warning(f"Failed to record query pattern: {e}")
- # Calculate duration
- duration_ms = int((time.time() - start_time) * 1000)
-
# Send response
if response:
_send_response(say, response, thread_ts)
@@ -566,6 +568,8 @@ def _handle_with_agent_streaming(
accumulated_text = ""
current_status = "Thinking..."
last_update_time = time.time()
+ last_flushed_text = ""
+ done_text = None
tool_count = 0
# Process streaming events
@@ -584,11 +588,23 @@ def _handle_with_agent_streaming(
# Accumulate text chunks
accumulated_text += event.data
- # Update message at intervals to avoid rate limiting
+ # Update on readable boundaries instead of every token burst.
current_time = time.time()
- if current_time - last_update_time >= STREAMING_UPDATE_INTERVAL:
+ if _should_flush_stream_update(
+ accumulated_text,
+ last_flushed_text,
+ last_update_time,
+ current_time,
+ ):
display_text = accumulated_text if accumulated_text else current_status
- _update_message_safe(client, channel_id, message_ts, display_text)
+ _update_message_safe(
+ client,
+ channel_id,
+ message_ts,
+ display_text,
+ parse_markdown=False,
+ )
+ last_flushed_text = display_text
last_update_time = current_time
elif event.event_type == StreamEventType.TOOL_START:
@@ -596,7 +612,14 @@ def _handle_with_agent_streaming(
tool_count += 1
current_status = f"Using {event.tool_name}..."
if not accumulated_text:
- _update_message_safe(client, channel_id, message_ts, current_status)
+ _update_message_safe(
+ client,
+ channel_id,
+ message_ts,
+ current_status,
+ parse_markdown=False,
+ )
+ last_flushed_text = current_status
last_update_time = time.time()
elif event.event_type == StreamEventType.TOOL_DONE:
@@ -607,7 +630,14 @@ def _handle_with_agent_streaming(
# Status update
current_status = event.data
if not accumulated_text:
- _update_message_safe(client, channel_id, message_ts, current_status)
+ _update_message_safe(
+ client,
+ channel_id,
+ message_ts,
+ current_status,
+ parse_markdown=False,
+ )
+ last_flushed_text = current_status
last_update_time = time.time()
elif event.event_type == StreamEventType.TEXT_DONE:
@@ -621,9 +651,7 @@ def _handle_with_agent_streaming(
return {"text": error_text}, None
elif event.event_type == StreamEventType.DONE:
- # Streaming complete
- final_text = event.data or accumulated_text
- _update_message_safe(client, channel_id, message_ts, final_text)
+ done_text = event.data or accumulated_text
except Exception as e:
logger.warning(f"Error processing stream event: {e}")
@@ -651,7 +679,7 @@ def _handle_with_agent_streaming(
return {"text": final_text, "_streaming_sent": True}, None
else:
# Fallback if no result
- fallback_text = accumulated_text or "I'm not sure how to respond."
+ fallback_text = done_text or accumulated_text or "I'm not sure how to respond."
_update_message_safe(client, channel_id, message_ts, fallback_text)
return {"text": fallback_text, "_streaming_sent": True}, None
@@ -751,6 +779,7 @@ def _handle_with_multi_agent_streaming(
accumulated_text = ""
current_status = "Thinking..."
last_update_time = time.time()
+ last_flushed_text = ""
done_text = None # Track text from "done" event
# Process streaming events
@@ -768,23 +797,49 @@ def _handle_with_multi_agent_streaming(
if event.event_type == "text_delta":
accumulated_text += event.data
- # Rate-limited updates
+ # Boundary-aware updates avoid choppy Slack redraws.
current_time = time.time()
- if current_time - last_update_time >= STREAMING_UPDATE_INTERVAL:
+ if _should_flush_stream_update(
+ accumulated_text,
+ last_flushed_text,
+ last_update_time,
+ current_time,
+ ):
display_text = accumulated_text if accumulated_text else current_status
- _update_message_safe(client, channel_id, message_ts, display_text)
+ _update_message_safe(
+ client,
+ channel_id,
+ message_ts,
+ display_text,
+ parse_markdown=False,
+ )
+ last_flushed_text = display_text
last_update_time = current_time
elif event.event_type == "tool_start":
current_status = f"Using {event.tool_name}..."
if not accumulated_text:
- _update_message_safe(client, channel_id, message_ts, current_status)
+ _update_message_safe(
+ client,
+ channel_id,
+ message_ts,
+ current_status,
+ parse_markdown=False,
+ )
+ last_flushed_text = current_status
last_update_time = time.time()
elif event.event_type == "thinking":
current_status = event.data
if not accumulated_text:
- _update_message_safe(client, channel_id, message_ts, current_status)
+ _update_message_safe(
+ client,
+ channel_id,
+ message_ts,
+ current_status,
+ parse_markdown=False,
+ )
+ last_flushed_text = current_status
last_update_time = time.time()
elif event.event_type == "tool_done":
@@ -799,7 +854,6 @@ def _handle_with_multi_agent_streaming(
elif event.event_type == "done":
done_text = event.data or accumulated_text
- _update_message_safe(client, channel_id, message_ts, done_text)
except Exception as e:
logger.warning(f"Error processing stream event: {e}")
@@ -844,7 +898,38 @@ def _handle_with_multi_agent_streaming(
return {"text": error_response}, None
-def _update_message_safe(client, channel_id: str, message_ts: str, text: str) -> None:
+def _should_flush_stream_update(
+ accumulated_text: str,
+ last_flushed_text: str,
+ last_update_time: float,
+ current_time: float,
+) -> bool:
+ """Decide whether a partial Slack stream is worth repainting."""
+ if not accumulated_text or accumulated_text == last_flushed_text:
+ return False
+
+ if accumulated_text.startswith(last_flushed_text):
+ new_text = accumulated_text[len(last_flushed_text):]
+ else:
+ new_text = accumulated_text
+ if len(new_text) < 80 and current_time - last_update_time < STREAMING_UPDATE_INTERVAL:
+ return False
+
+ stripped = accumulated_text.rstrip()
+ has_boundary = bool(re.search(r"(\n\n|[.!?]\s|:\n|- .+\n)$", stripped))
+ enough_time = current_time - last_update_time >= STREAMING_UPDATE_INTERVAL
+ enough_text = len(new_text) >= 220
+
+ return (has_boundary and enough_time) or enough_text
+
+
+def _update_message_safe(
+ client,
+ channel_id: str,
+ message_ts: str,
+ text: str,
+ parse_markdown: bool = True,
+) -> None:
"""Safely update a Slack message, handling errors gracefully.
Args:
@@ -854,8 +939,8 @@ def _update_message_safe(client, channel_id: str, message_ts: str, text: str) ->
text: New message text.
"""
try:
- # Convert markdown to Slack mrkdwn format
- slack_text = markdown_to_slack(text)
+ # Partial markdown can flicker while it is incomplete; final renders are formatted.
+ slack_text = markdown_to_slack(text) if parse_markdown else text
client.chat_update(
channel=channel_id,
ts=message_ts,
diff --git a/src/bot/executor.py b/src/bot/executor.py
index 31e5f83..9a6791e 100644
--- a/src/bot/executor.py
+++ b/src/bot/executor.py
@@ -5,12 +5,12 @@
Supports both synchronous and streaming execution modes.
"""
-import json
import logging
+from collections.abc import Generator
from dataclasses import dataclass, field
-from datetime import datetime, timezone
+from datetime import datetime
from enum import Enum
-from typing import TYPE_CHECKING, Any, Generator, Iterator
+from typing import TYPE_CHECKING, Any
from anthropic import Anthropic
@@ -19,33 +19,17 @@
ENABLE_DIRECT_EMAIL_SEND,
PRIMARY_ACCOUNT,
ZOTERO_DEFAULT_COLLECTION,
+ get_user_timezone,
)
from .tools import (
- ALL_TOOLS,
TOOL_NAME_MAP,
ToolResult,
get_tool_schemas,
parse_date_reference,
- SemanticSearchTool,
- SearchEmailsTool,
- SearchDriveTool,
- GetCalendarEventsTool,
- CheckAvailabilityTool,
- CreateCalendarEventTool,
- GetUnreadCountsTool,
- CreateEmailDraftTool,
- SendEmailTool,
- GetGitHubPRsTool,
- GetGitHubIssuesTool,
- SearchGitHubCodeTool,
- CreateGitHubIssueTool,
- FindPersonTool,
- GetPersonActivityTool,
- GetDailyBriefingTool,
- RespondToUserTool,
)
if TYPE_CHECKING:
+ from .actions.confirmable import PendingAction
from .conversation import ConversationContext
from .user_memory import UserMemory
@@ -65,10 +49,13 @@
- Manage outbound email
- Check calendar events and availability
- Create calendar events and send meeting invites to attendees
+- Update or cancel calendar events when the user provides an event ID or enough context to identify one
- Search GitHub code, issues, and PRs
- Create GitHub issues
-- Get and create Todoist tasks, mark tasks complete
+- Get, create, update, comment on, reopen, and complete Todoist tasks
- Search Notion pages and databases
+- Add, reply to, and resolve Google Doc comments
+- Read and update proactive notification settings
- Get daily briefings
Guidelines:
@@ -82,7 +69,7 @@
8. {email_send_policy}
9. When the user asks about "tasks" or "to-dos", use the Todoist tools (GetTodoistTasksTool, CreateTodoistTaskTool).
-Current date: {current_date}
+Current local date/time: {current_date}
"""
@@ -135,6 +122,7 @@ def __init__(self):
self._notion_client = None
self._todoist_client = None
self._zotero_client = None
+ self._proactive_settings = None
@property
def semantic_indexer(self):
@@ -192,6 +180,14 @@ def zotero_client(self):
self._zotero_client = ZoteroClient()
return self._zotero_client
+ @property
+ def proactive_settings(self):
+ """Lazy load proactive settings store."""
+ if self._proactive_settings is None:
+ from .proactive_settings import ProactiveSettingsStore
+ self._proactive_settings = ProactiveSettingsStore()
+ return self._proactive_settings
+
def execute(
self,
tool_name: str,
@@ -218,18 +214,75 @@ def execute(
if handler_name == "send_email":
return handler(arguments, context=context)
+ if handler_name in {
+ "create_calendar_event",
+ "update_calendar_event",
+ "delete_calendar_event",
+ "create_email_draft",
+ "create_github_issue",
+ "create_todoist_task",
+ "complete_todoist_task",
+ "update_todoist_task",
+ "add_todoist_comment",
+ "reopen_todoist_task",
+ "create_notion_page",
+ "add_notion_comment",
+ "add_google_doc_comment",
+ "reply_google_doc_comment",
+ "resolve_google_doc_comment",
+ "get_proactive_settings",
+ "update_proactive_settings",
+ "add_zotero_paper",
+ }:
+ return handler(arguments, context=context)
return handler(arguments)
except Exception as e:
logger.error(f"Error executing tool {tool_name}: {e}", exc_info=True)
return ToolResult(success=False, error=str(e))
+ def _queue_confirmation(
+ self,
+ action: "PendingAction",
+ context: "ConversationContext | None",
+ message: str,
+ ) -> ToolResult:
+ """Queue a write action behind Slack button confirmation."""
+ if context is None:
+ return ToolResult(
+ success=False,
+ error="Missing conversation context for confirmation-gated action.",
+ )
+
+ context.pending_action = action
+ return ToolResult(data={
+ "requires_confirmation": True,
+ "message": message,
+ "confirmation": action.get_confirmation_prompt(),
+ })
+
def _execute_semantic_search(self, args: dict) -> ToolResult:
"""Execute semantic search."""
+ filters = {}
+ sources = args.get("sources")
+ if sources and len(sources) == 1:
+ filters["source"] = sources[0]
+ elif sources:
+ # Chroma metadata filters do not support the simple list shape here;
+ # filter after search when multiple sources are requested.
+ filters = None
+
results = self.semantic_indexer.search(
query=args["query"],
+ content_types=args.get("content_types"),
top_k=args.get("max_results", 10),
+ filters=filters or None,
)
+ if sources and len(sources) > 1:
+ results = [
+ r for r in results
+ if (r.get("metadata") or {}).get("source") in sources
+ ]
return ToolResult(data={
"query": args["query"],
"result_count": len(results),
@@ -264,13 +317,73 @@ def _execute_search_drive(self, args: dict) -> ToolResult:
def _execute_get_calendar_events(self, args: dict) -> ToolResult:
"""Get calendar events."""
target_date = parse_date_reference(args.get("date", "today"))
+ tz = get_user_timezone()
+ now = datetime.now(tz)
events = self.multi_google.get_all_calendars_for_date(target_date)
+ target_local_date = target_date.astimezone(tz).date()
+ today_local_date = now.date()
+ upcoming_events = (
+ self._filter_upcoming_events(events, now)
+ if target_local_date == today_local_date
+ else events
+ )
+
return ToolResult(data={
"date": target_date.strftime("%Y-%m-%d"),
+ "current_time": now.isoformat(),
+ "timezone": str(tz),
"event_count": len(events),
+ "upcoming_event_count": len(upcoming_events),
+ "next_event": upcoming_events[0] if upcoming_events else None,
"events": events,
+ "upcoming_events": upcoming_events,
})
+ @staticmethod
+ def _coerce_event_datetime(value: Any, tz) -> datetime | None:
+ """Normalize event datetime values from integrations or mocks."""
+ if value is None:
+ return None
+ if isinstance(value, datetime):
+ dt = value
+ elif isinstance(value, str):
+ raw = value[:-1] + "+00:00" if value.endswith("Z") else value
+ try:
+ dt = datetime.fromisoformat(raw)
+ except ValueError:
+ return None
+ else:
+ return None
+
+ if dt.tzinfo is None:
+ return dt.replace(tzinfo=tz)
+ return dt.astimezone(tz)
+
+ @classmethod
+ def _filter_upcoming_events(
+ cls,
+ events: list[dict[str, Any]],
+ now: datetime,
+ ) -> list[dict[str, Any]]:
+ """Return events that have not ended yet, sorted by start time."""
+ tz = now.tzinfo
+ upcoming = []
+ for event in events:
+ start = cls._coerce_event_datetime(event.get("start"), tz)
+ end = cls._coerce_event_datetime(event.get("end"), tz)
+ if end is not None:
+ if end > now:
+ upcoming.append(event)
+ elif start is None or start >= now:
+ upcoming.append(event)
+
+ latest = datetime.max.replace(tzinfo=tz)
+ return sorted(
+ upcoming,
+ key=lambda event: cls._coerce_event_datetime(event.get("start"), tz)
+ or latest,
+ )
+
def _execute_check_availability(self, args: dict) -> ToolResult:
"""Check availability."""
target_date = parse_date_reference(args.get("date", "today"))
@@ -289,12 +402,15 @@ def _execute_check_availability(self, args: dict) -> ToolResult:
"free_slots": free_slots,
})
- def _execute_create_calendar_event(self, args: dict) -> ToolResult:
- """Create a calendar event."""
+ def _execute_create_calendar_event(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue calendar event creation for confirmation."""
from datetime import timedelta
- from ..config import get_user_timezone
- tz = get_user_timezone()
+ from .actions.confirmable import ConfirmableAction
# Parse date and time
start_dt = self._parse_event_datetime(
@@ -310,106 +426,162 @@ def _execute_create_calendar_event(self, args: dict) -> ToolResult:
description = args.get("description", "")
account = args.get("account") or PRIMARY_ACCOUNT
- # Create the event
- event = self.multi_google.create_calendar_event(
- account=account,
- summary=args["title"],
- start=start_dt,
- end=end_dt,
- description=description or None,
- attendees=attendees if attendees else None,
- location=location or None,
- send_notifications=True,
+ preview = (
+ f"*Event:* {args['title']}\n"
+ f"*When:* {start_dt.strftime('%Y-%m-%d %I:%M %p')} "
+ f"({duration} min)\n"
+ f"*Account:* {account}"
)
-
- # Build response
- attendee_msg = ""
+ if location:
+ preview += f"\n*Location:* {location}"
if attendees:
- attendee_msg = f" Calendar invites sent to {len(attendees)} attendee(s)."
+ preview += f"\n*Attendees:* {', '.join(attendees)}"
+ preview += "\n_(Calendar invites will be sent.)_"
+ if description:
+ desc_preview = description[:200] + ("..." if len(description) > 200 else "")
+ preview += f"\n*Description:* {desc_preview}"
+
+ def execute_event() -> dict[str, Any]:
+ event = self.multi_google.create_calendar_event(
+ account=account,
+ summary=args["title"],
+ start=start_dt,
+ end=end_dt,
+ description=description or None,
+ attendees=attendees if attendees else None,
+ location=location or None,
+ send_notifications=True,
+ )
+ attendee_msg = ""
+ if attendees:
+ attendee_msg = f" Calendar invites sent to {len(attendees)} attendee(s)."
+ return {
+ "success": True,
+ "event_id": event.get("id"),
+ "html_link": event.get("htmlLink"),
+ "message": (
+ f"Created event '{args['title']}' on {start_dt.strftime('%Y-%m-%d')} "
+ f"at {start_dt.strftime('%I:%M %p')}.{attendee_msg}"
+ ),
+ }
- return ToolResult(data={
- "event_id": event.get("id"),
- "html_link": event.get("htmlLink"),
- "title": args["title"],
- "start": start_dt.isoformat(),
- "end": end_dt.isoformat(),
- "account": account,
- "attendees": attendees,
- "message": f"Created event '{args['title']}' on {start_dt.strftime('%Y-%m-%d')} at {start_dt.strftime('%I:%M %p')}.{attendee_msg}",
- })
+ return self._queue_confirmation(
+ ConfirmableAction("Create Calendar Event", preview, execute_event),
+ context,
+ "Please confirm creating this calendar event.",
+ )
- def _parse_event_datetime(self, date_str: str, time_str: str) -> datetime:
- """Parse date and time strings into a datetime."""
+ def _execute_update_calendar_event(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue calendar event update for confirmation."""
from datetime import timedelta
- from ..config import get_user_timezone
- tz = get_user_timezone()
- now = datetime.now(tz)
+ from .actions.confirmable import ConfirmableAction
- # Parse date
- date_lower = date_str.lower().strip()
- if date_lower == "today":
- target_date = now.date()
- elif date_lower == "tomorrow":
- target_date = (now + timedelta(days=1)).date()
- elif date_lower == "yesterday":
- target_date = (now - timedelta(days=1)).date()
- else:
- # Try day names (next occurrence)
- day_names = ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday"]
- if date_lower in day_names:
- target_weekday = day_names.index(date_lower)
- days_ahead = target_weekday - now.weekday()
- if days_ahead <= 0: # Target day already happened this week
- days_ahead += 7
- target_date = (now + timedelta(days=days_ahead)).date()
- else:
- # Try ISO format
- try:
- target_date = datetime.fromisoformat(date_str).date()
- except ValueError:
- target_date = now.date()
-
- # Parse time
- time_lower = time_str.lower().strip()
- hour = 12 # Default to noon
- minute = 0
-
- if time_lower == "noon":
- hour, minute = 12, 0
- elif time_lower == "midnight":
- hour, minute = 0, 0
- elif ":" in time_lower:
- # Format like "14:00" or "2:30pm"
- time_part = time_lower.replace("am", "").replace("pm", "").strip()
- parts = time_part.split(":")
- hour = int(parts[0])
- minute = int(parts[1]) if len(parts) > 1 else 0
- if "pm" in time_lower and hour < 12:
- hour += 12
- elif "am" in time_lower and hour == 12:
- hour = 0
- else:
- # Format like "2pm" or "14"
- time_clean = time_lower.replace("am", "").replace("pm", "").strip()
- try:
- hour = int(time_clean)
- if "pm" in time_lower and hour < 12:
- hour += 12
- elif "am" in time_lower and hour == 12:
- hour = 0
- except ValueError:
- pass
-
- return datetime(
- year=target_date.year,
- month=target_date.month,
- day=target_date.day,
- hour=hour,
- minute=minute,
- tzinfo=tz,
+ account = args.get("account") or PRIMARY_ACCOUNT
+ calendar_id = args.get("calendar_id", "primary")
+ updates: dict[str, Any] = {}
+ preview_lines = [
+ f"*Event ID:* {args['event_id']}",
+ f"*Account:* {account}",
+ f"*Calendar:* {calendar_id}",
+ ]
+
+ if args.get("title") is not None:
+ updates["summary"] = args["title"]
+ preview_lines.append(f"*New title:* {args['title']}")
+ if args.get("location") is not None:
+ updates["location"] = args["location"]
+ preview_lines.append(f"*New location:* {args['location'] or '(blank)'}")
+ if args.get("description") is not None:
+ updates["description"] = args["description"]
+ desc = args["description"][:200] + ("..." if len(args["description"]) > 200 else "")
+ preview_lines.append(f"*New description:* {desc or '(blank)'}")
+ if args.get("attendees") is not None:
+ updates["attendees"] = args["attendees"]
+ preview_lines.append(f"*New attendees:* {', '.join(args['attendees']) or '(none)'}")
+
+ if args.get("date") or args.get("time"):
+ start_dt = self._parse_event_datetime(
+ args.get("date") or "today",
+ args.get("time") or "12:00",
+ )
+ duration = args.get("duration_minutes") or 60
+ updates["start"] = start_dt
+ updates["end"] = start_dt + timedelta(minutes=duration)
+ preview_lines.append(
+ f"*New time:* {start_dt.strftime('%Y-%m-%d %I:%M %p')} ({duration} min)"
+ )
+
+ if not updates:
+ return ToolResult(success=False, error="No calendar event updates were provided.")
+
+ def execute_update() -> dict[str, Any]:
+ event = self.multi_google.update_calendar_event(
+ account=account,
+ event_id=args["event_id"],
+ calendar_id=calendar_id,
+ send_notifications=args.get("send_notifications", True),
+ **updates,
+ )
+ return {
+ "success": True,
+ "event_id": event.get("id", args["event_id"]),
+ "html_link": event.get("htmlLink"),
+ "message": f"Updated calendar event: {event.get('summary', args['event_id'])}",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction("Update Calendar Event", "\n".join(preview_lines), execute_update),
+ context,
+ "Please confirm updating this calendar event.",
)
+ def _execute_delete_calendar_event(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue calendar event deletion for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ account = args.get("account") or PRIMARY_ACCOUNT
+ calendar_id = args.get("calendar_id", "primary")
+ preview = (
+ f"*Event ID:* {args['event_id']}\n"
+ f"*Account:* {account}\n"
+ f"*Calendar:* {calendar_id}\n"
+ f"*Send cancellation notifications:* {args.get('send_notifications', True)}"
+ )
+
+ def execute_delete() -> dict[str, Any]:
+ self.multi_google.delete_calendar_event(
+ account=account,
+ event_id=args["event_id"],
+ calendar_id=calendar_id,
+ send_notifications=args.get("send_notifications", True),
+ )
+ return {
+ "success": True,
+ "event_id": args["event_id"],
+ "message": f"Cancelled calendar event: {args['event_id']}",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction("Cancel Calendar Event", preview, execute_delete),
+ context,
+ "Please confirm cancelling this calendar event.",
+ )
+
+ def _parse_event_datetime(self, date_str: str, time_str: str) -> datetime:
+ """Parse date and time strings into a datetime."""
+ from .datetime_utils import parse_event_datetime
+
+ return parse_event_datetime(date_str, time_str)
+
def _execute_get_unread_counts(self, args: dict) -> ToolResult:
"""Get unread email counts."""
counts = self.multi_google.get_unread_counts()
@@ -419,22 +591,26 @@ def _execute_get_unread_counts(self, args: dict) -> ToolResult:
"by_account": counts,
})
- def _execute_create_email_draft(self, args: dict) -> ToolResult:
- """Create email draft."""
+ def _execute_create_email_draft(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue email draft creation for confirmation."""
+ from .actions.email_actions import CreateDraftAction
+
account = args.get("account") or PRIMARY_ACCOUNT
- draft = self.multi_google.create_draft(
- account=account,
+ action = CreateDraftAction(
to=args["to"],
subject=args["subject"],
body=args["body"],
+ account=account,
+ )
+ return self._queue_confirmation(
+ action,
+ context,
+ "Please confirm creating this email draft.",
)
- return ToolResult(data={
- "draft_id": draft.get("id"),
- "account": account,
- "to": args["to"],
- "subject": args["subject"],
- "message": f"Draft created in {account} account",
- })
def _execute_send_email(
self,
@@ -520,20 +696,25 @@ def _execute_search_github_code(self, args: dict) -> ToolResult:
"results": results,
})
- def _execute_create_github_issue(self, args: dict) -> ToolResult:
- """Create GitHub issue."""
- issue = self.github_client.create_issue(
+ def _execute_create_github_issue(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue GitHub issue creation for confirmation."""
+ from .actions.github_actions import CreateIssueAction
+
+ action = CreateIssueAction(
repo=args["repo"],
title=args["title"],
body=args.get("body", ""),
labels=args.get("labels", []),
)
- return ToolResult(data={
- "issue_number": issue.get("number"),
- "url": issue.get("html_url"),
- "title": args["title"],
- "message": f"Issue created: {issue.get('html_url')}",
- })
+ return self._queue_confirmation(
+ action,
+ context,
+ "Please confirm creating this GitHub issue.",
+ )
def _execute_find_person(self, args: dict) -> ToolResult:
"""Find person in knowledge graph."""
@@ -644,8 +825,14 @@ def _execute_get_todoist_tasks(self, args: dict) -> ToolResult:
"tasks": formatted,
})
- def _execute_create_todoist_task(self, args: dict) -> ToolResult:
- """Create a new task in Todoist."""
+ def _execute_create_todoist_task(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue Todoist task creation for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
# Find project ID if project name provided
project_id = None
project_name = args.get("project")
@@ -656,24 +843,50 @@ def _execute_create_todoist_task(self, args: dict) -> ToolResult:
project_id = p["id"]
break
- task = self.todoist_client.create_task(
- content=args["content"],
- description=args.get("description"),
- project_id=project_id,
- due_string=args.get("due"),
- priority=args.get("priority", 1),
- labels=args.get("labels"),
+ preview = f"*Task:* {args['content']}"
+ if project_name:
+ preview += f"\n*Project:* {project_name}"
+ if args.get("due"):
+ preview += f"\n*Due:* {args['due']}"
+ if args.get("description"):
+ desc = args["description"][:200]
+ if len(args["description"]) > 200:
+ desc += "..."
+ preview += f"\n*Description:* {desc}"
+ if args.get("labels"):
+ preview += f"\n*Labels:* {', '.join(args['labels'])}"
+
+ def execute_task() -> dict[str, Any]:
+ task = self.todoist_client.create_task(
+ content=args["content"],
+ description=args.get("description"),
+ project_id=project_id,
+ due_string=args.get("due"),
+ priority=args.get("priority", 1),
+ labels=args.get("labels"),
+ )
+ return {
+ "success": True,
+ "task_id": task["id"],
+ "content": task["content"],
+ "url": task.get("url"),
+ "message": f"Task created: {task['content']}",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction("Create Todoist Task", preview, execute_task),
+ context,
+ "Please confirm creating this Todoist task.",
)
- return ToolResult(data={
- "task_id": task["id"],
- "content": task["content"],
- "url": task.get("url"),
- "message": f"Task created: {task['content']}",
- })
+ def _execute_complete_todoist_task(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue Todoist task completion for confirmation."""
+ from .actions.confirmable import ConfirmableAction
- def _execute_complete_todoist_task(self, args: dict) -> ToolResult:
- """Mark a Todoist task as complete."""
task_id = args["task_id"]
# Get task info first for confirmation message
@@ -683,12 +896,132 @@ def _execute_complete_todoist_task(self, args: dict) -> ToolResult:
except Exception:
task_content = "Unknown task"
- self.todoist_client.complete_task(task_id)
+ def execute_complete() -> dict[str, Any]:
+ self.todoist_client.complete_task(task_id)
+ return {
+ "success": True,
+ "task_id": task_id,
+ "message": f"Completed: {task_content}",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Complete Todoist Task",
+ f"*Task:* {task_content}\n*ID:* {task_id}",
+ execute_complete,
+ ),
+ context,
+ "Please confirm completing this Todoist task.",
+ )
- return ToolResult(data={
- "task_id": task_id,
- "message": f"Completed: {task_content}",
- })
+ def _execute_update_todoist_task(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue Todoist task update for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ updates = {
+ "content": args.get("content"),
+ "description": args.get("description"),
+ "due_string": args.get("due"),
+ "priority": args.get("priority"),
+ "labels": args.get("labels"),
+ }
+ updates = {k: v for k, v in updates.items() if v is not None}
+ if not updates:
+ return ToolResult(success=False, error="No Todoist task updates were provided.")
+
+ try:
+ task = self.todoist_client.get_task(args["task_id"])
+ task_content = task.get("content", "Unknown task")
+ except Exception:
+ task_content = "Unknown task"
+
+ preview_lines = [f"*Task:* {task_content}", f"*ID:* {args['task_id']}"]
+ if args.get("content") is not None:
+ preview_lines.append(f"*New content:* {args['content']}")
+ if args.get("description") is not None:
+ desc = args["description"][:200] + ("..." if len(args["description"]) > 200 else "")
+ preview_lines.append(f"*New description:* {desc or '(blank)'}")
+ if args.get("due") is not None:
+ preview_lines.append(f"*New due:* {args['due']}")
+ if args.get("priority") is not None:
+ preview_lines.append(f"*New priority:* {args['priority']}")
+ if args.get("labels") is not None:
+ preview_lines.append(f"*New labels:* {', '.join(args['labels']) or '(none)'}")
+
+ def execute_update() -> dict[str, Any]:
+ task = self.todoist_client.update_task(args["task_id"], **updates)
+ return {
+ "success": True,
+ "task_id": task["id"],
+ "content": task["content"],
+ "url": task.get("url"),
+ "message": f"Updated Todoist task: {task['content']}",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction("Update Todoist Task", "\n".join(preview_lines), execute_update),
+ context,
+ "Please confirm updating this Todoist task.",
+ )
+
+ def _execute_add_todoist_comment(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue Todoist task comment for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ comment = args["content"][:300] + ("..." if len(args["content"]) > 300 else "")
+
+ def execute_comment() -> dict[str, Any]:
+ created = self.todoist_client.add_comment(args["task_id"], args["content"])
+ return {
+ "success": True,
+ "comment_id": created["id"],
+ "task_id": args["task_id"],
+ "message": "Added Todoist task comment.",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Add Todoist Comment",
+ f"*Task ID:* {args['task_id']}\n*Comment:*\n{comment}",
+ execute_comment,
+ ),
+ context,
+ "Please confirm adding this Todoist comment.",
+ )
+
+ def _execute_reopen_todoist_task(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue reopening a Todoist task for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ def execute_reopen() -> dict[str, Any]:
+ self.todoist_client.reopen_task(args["task_id"])
+ return {
+ "success": True,
+ "task_id": args["task_id"],
+ "message": f"Reopened Todoist task: {args['task_id']}",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Reopen Todoist Task",
+ f"*Task ID:* {args['task_id']}",
+ execute_reopen,
+ ),
+ context,
+ "Please confirm reopening this Todoist task.",
+ )
def _execute_search_notion(self, args: dict) -> ToolResult:
"""Search Notion pages and databases."""
@@ -714,8 +1047,14 @@ def _execute_search_notion(self, args: dict) -> ToolResult:
"results": formatted,
})
- def _execute_create_notion_page(self, args: dict) -> ToolResult:
- """Create a new page in a Notion database."""
+ def _execute_create_notion_page(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue Notion page creation for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
# Build properties with title
properties = args.get("properties", {})
# Add title property (Notion databases typically use "Name" or "Title")
@@ -723,30 +1062,251 @@ def _execute_create_notion_page(self, args: dict) -> ToolResult:
"title": [{"text": {"content": args["title"]}}]
}
- page = self.notion_client.create_page(
- database_id=args["database_id"],
- properties=properties,
+ def execute_page() -> dict[str, Any]:
+ page = self.notion_client.create_page(
+ database_id=args["database_id"],
+ properties=properties,
+ )
+ return {
+ "success": True,
+ "page_id": page["id"],
+ "url": page.get("url"),
+ "title": args["title"],
+ "message": f"Page created: {page.get('url', page['id'])}",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Create Notion Page",
+ f"*Title:* {args['title']}\n*Database:* {args['database_id']}",
+ execute_page,
+ ),
+ context,
+ "Please confirm creating this Notion page.",
)
- return ToolResult(data={
- "page_id": page["id"],
- "url": page.get("url"),
- "title": args["title"],
- "message": f"Page created: {page.get('url', page['id'])}",
- })
+ def _execute_add_notion_comment(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue Notion comment creation for confirmation."""
+ from .actions.confirmable import ConfirmableAction
- def _execute_add_notion_comment(self, args: dict) -> ToolResult:
- """Add a comment to a Notion page."""
- comment = self.notion_client.add_comment(
- page_id=args["page_id"],
- content=args["content"],
+ comment_preview = args["content"][:300]
+ if len(args["content"]) > 300:
+ comment_preview += "..."
+
+ def execute_comment() -> dict[str, Any]:
+ comment = self.notion_client.add_comment(
+ page_id=args["page_id"],
+ content=args["content"],
+ )
+ return {
+ "success": True,
+ "comment_id": comment["id"],
+ "page_id": args["page_id"],
+ "message": "Comment added successfully",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Add Notion Comment",
+ f"*Page:* {args['page_id']}\n*Comment:*\n{comment_preview}",
+ execute_comment,
+ ),
+ context,
+ "Please confirm adding this Notion comment.",
)
- return ToolResult(data={
- "comment_id": comment["id"],
- "page_id": args["page_id"],
- "message": "Comment added successfully",
- })
+ def _execute_add_google_doc_comment(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue adding a Google Doc comment for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ account = args.get("account") or PRIMARY_ACCOUNT
+ comment_preview = args["content"][:300] + ("..." if len(args["content"]) > 300 else "")
+ preview = (
+ f"*Document:* {args['document_id']}\n"
+ f"*Account:* {account}\n"
+ f"*Comment:*\n{comment_preview}"
+ )
+ if args.get("quoted_text"):
+ preview += f"\n*Anchor text:* {args['quoted_text'][:200]}"
+
+ def execute_comment() -> dict[str, Any]:
+ comment = self.multi_google.add_doc_comment(
+ account=account,
+ document_id=args["document_id"],
+ content=args["content"],
+ quoted_text=args.get("quoted_text"),
+ )
+ return {
+ "success": True,
+ "comment_id": comment["id"],
+ "document_id": args["document_id"],
+ "message": "Added Google Doc comment.",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction("Add Google Doc Comment", preview, execute_comment),
+ context,
+ "Please confirm adding this Google Doc comment.",
+ )
+
+ def _execute_reply_google_doc_comment(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue replying to a Google Doc comment for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ account = args.get("account") or PRIMARY_ACCOUNT
+ reply_preview = args["content"][:300] + ("..." if len(args["content"]) > 300 else "")
+
+ def execute_reply() -> dict[str, Any]:
+ reply = self.multi_google.reply_to_doc_comment(
+ account=account,
+ document_id=args["document_id"],
+ comment_id=args["comment_id"],
+ content=args["content"],
+ )
+ return {
+ "success": True,
+ "reply_id": reply["id"],
+ "comment_id": args["comment_id"],
+ "message": "Replied to Google Doc comment.",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Reply Google Doc Comment",
+ (
+ f"*Document:* {args['document_id']}\n"
+ f"*Comment:* {args['comment_id']}\n"
+ f"*Account:* {account}\n"
+ f"*Reply:*\n{reply_preview}"
+ ),
+ execute_reply,
+ ),
+ context,
+ "Please confirm replying to this Google Doc comment.",
+ )
+
+ def _execute_resolve_google_doc_comment(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue resolving a Google Doc comment for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ account = args.get("account") or PRIMARY_ACCOUNT
+
+ def execute_resolve() -> dict[str, Any]:
+ comment = self.multi_google.resolve_doc_comment(
+ account=account,
+ document_id=args["document_id"],
+ comment_id=args["comment_id"],
+ )
+ return {
+ "success": True,
+ "comment_id": comment["id"],
+ "message": "Resolved Google Doc comment.",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Resolve Google Doc Comment",
+ (
+ f"*Document:* {args['document_id']}\n"
+ f"*Comment:* {args['comment_id']}\n"
+ f"*Account:* {account}"
+ ),
+ execute_resolve,
+ ),
+ context,
+ "Please confirm resolving this Google Doc comment.",
+ )
+
+ def _execute_get_proactive_settings(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Get proactive settings for the current Slack user."""
+ if context is None:
+ return ToolResult(success=False, error="Missing conversation context.")
+ settings = self.proactive_settings.get(context.user_id)
+ return ToolResult(data=settings.to_dict())
+
+ def _execute_update_proactive_settings(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue proactive settings update for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
+ if context is None:
+ return ToolResult(success=False, error="Missing conversation context.")
+
+ allowed = {
+ "calendar_reminders_enabled",
+ "email_alerts_enabled",
+ "daily_briefing_enabled",
+ "reminder_minutes_before",
+ "briefing_hour",
+ "briefing_minute",
+ "briefing_days",
+ "important_contacts",
+ "alert_keywords",
+ "quiet_hours_start",
+ "quiet_hours_end",
+ }
+ updates = {k: v for k, v in args.items() if k in allowed and v is not None}
+ if not updates:
+ return ToolResult(success=False, error="No proactive setting updates were provided.")
+
+ def _validate_hour(name: str) -> None:
+ if name in updates and not 0 <= int(updates[name]) <= 23:
+ raise ValueError(f"{name} must be between 0 and 23")
+
+ try:
+ _validate_hour("briefing_hour")
+ _validate_hour("quiet_hours_start")
+ _validate_hour("quiet_hours_end")
+ if "briefing_minute" in updates and not 0 <= int(updates["briefing_minute"]) <= 59:
+ raise ValueError("briefing_minute must be between 0 and 59")
+ if "reminder_minutes_before" in updates and int(updates["reminder_minutes_before"]) < 0:
+ raise ValueError("reminder_minutes_before must be non-negative")
+ if "briefing_days" in updates and any(day < 0 or day > 6 for day in updates["briefing_days"]):
+ raise ValueError("briefing_days values must be between 0 and 6")
+ except ValueError as e:
+ return ToolResult(success=False, error=str(e))
+
+ preview = "\n".join(f"*{key}:* {value}" for key, value in updates.items())
+
+ def execute_update() -> dict[str, Any]:
+ settings = self.proactive_settings.get(context.user_id)
+ for key, value in updates.items():
+ setattr(settings, key, value)
+ self.proactive_settings.save(settings)
+ return {
+ "success": True,
+ "settings": settings.to_dict(),
+ "message": "Updated proactive notification settings.",
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction("Update Proactive Settings", preview, execute_update),
+ context,
+ "Please confirm updating your proactive notification settings.",
+ )
def _execute_search_zotero_papers(self, args: dict) -> ToolResult:
"""Search papers in Zotero library."""
@@ -878,8 +1438,14 @@ def _execute_get_zotero_collection(self, args: dict) -> ToolResult:
"papers": formatted,
})
- def _execute_add_zotero_paper(self, args: dict) -> ToolResult:
- """Add a paper to Zotero by DOI or URL."""
+ def _execute_add_zotero_paper(
+ self,
+ args: dict,
+ context: "ConversationContext | None" = None,
+ ) -> ToolResult:
+ """Queue adding a paper to Zotero for confirmation."""
+ from .actions.confirmable import ConfirmableAction
+
identifier = args["identifier"].strip()
collection = args.get("collection") or ZOTERO_DEFAULT_COLLECTION
@@ -890,23 +1456,29 @@ def _execute_add_zotero_paper(self, args: dict) -> ToolResult:
identifier.lower().startswith("doi:")
)
- try:
+ def execute_add() -> dict[str, Any]:
if is_doi:
item = self.zotero_client.add_item_by_doi(identifier, collection)
else:
item = self.zotero_client.add_item_by_url(identifier, collection)
- return ToolResult(data={
+ return {
+ "success": True,
"key": item["key"],
"title": item["title"],
"collection": collection,
"message": f"Paper added to Zotero: {item['title']}",
- })
- except Exception as e:
- return ToolResult(
- success=False,
- error=f"Failed to add paper: {str(e)}",
- )
+ }
+
+ return self._queue_confirmation(
+ ConfirmableAction(
+ "Add Zotero Paper",
+ f"*Identifier:* {identifier}\n*Collection:* {collection or '(default)'}",
+ execute_add,
+ ),
+ context,
+ "Please confirm adding this paper to Zotero.",
+ )
class AgentExecutor:
@@ -953,7 +1525,9 @@ def run(
ExecutionResult with response and tool call history.
"""
# Build system prompt with current date and user context
- current_date = datetime.now(timezone.utc).strftime("%A, %B %d, %Y")
+ current_date = datetime.now(get_user_timezone()).strftime(
+ "%A, %B %d, %Y %I:%M %p %Z"
+ )
email_send_policy = (
"If you need to send an email, use SendEmailTool which requires explicit Slack confirmation."
if ENABLE_DIRECT_EMAIL_SEND
@@ -1061,15 +1635,16 @@ def run(
})
if (
- tool_name == "SendEmailTool"
- and result.success
+ result.success
and isinstance(result.data, dict)
and result.data.get("requires_confirmation")
):
confirmation = result.data.get("confirmation", {})
return ExecutionResult(
response=confirmation.get(
- "text", "Please confirm sending this email."
+ "text", result.data.get(
+ "message", "Please confirm this action."
+ )
),
tool_calls=tool_calls_history,
iterations=iterations,
@@ -1134,7 +1709,9 @@ def run_streaming(
ExecutionResult with final response and tool call history.
"""
# Build system prompt with current date and user context
- current_date = datetime.now(timezone.utc).strftime("%A, %B %d, %Y")
+ current_date = datetime.now(get_user_timezone()).strftime(
+ "%A, %B %d, %Y %I:%M %p %Z"
+ )
email_send_policy = (
"If you need to send an email, use SendEmailTool which requires explicit Slack confirmation."
if ENABLE_DIRECT_EMAIL_SEND
@@ -1311,14 +1888,15 @@ def run_streaming(
})
if (
- tool_name == "SendEmailTool"
- and result.success
+ result.success
and isinstance(result.data, dict)
and result.data.get("requires_confirmation")
):
confirmation = result.data.get("confirmation", {})
response_text = confirmation.get(
- "text", "Please confirm sending this email."
+ "text", result.data.get(
+ "message", "Please confirm this action."
+ )
)
yield StreamEvent(
event_type=StreamEventType.DONE,
@@ -1419,7 +1997,8 @@ def _build_messages(
messages.append({"role": role, "content": content})
# Add current message
- messages.append({"role": "user", "content": current_message})
+ if not messages or messages[-1] != {"role": "user", "content": current_message}:
+ messages.append({"role": "user", "content": current_message})
return messages
@@ -1469,7 +2048,8 @@ def _extract_memories(
messages.append({"role": role, "content": content})
# Add current exchange
- messages.append({"role": "user", "content": user_message})
+ if not messages or messages[-1] != {"role": "user", "content": user_message}:
+ messages.append({"role": "user", "content": user_message})
messages.append({"role": "assistant", "content": assistant_response})
# Auto-extract memories via Mem0
diff --git a/src/bot/heartbeat.py b/src/bot/heartbeat.py
index d3b860b..1b29a1c 100644
--- a/src/bot/heartbeat.py
+++ b/src/bot/heartbeat.py
@@ -1,15 +1,17 @@
"""Heartbeat system for proactive notifications."""
+import json
import logging
import time
from datetime import datetime, timedelta, timezone
from typing import TYPE_CHECKING, Any
+from anthropic import Anthropic
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
-from ..config import SLACK_AUTHORIZED_USERS, get_user_timezone
-from .formatters import format_briefing, format_calendar_events
+from ..config import ANTHROPIC_API_KEY, AGENT_MODEL, SLACK_AUTHORIZED_USERS, get_user_timezone
+from .formatters import format_briefing, format_calendar_events, markdown_to_slack
from .proactive_settings import ProactiveSettingsStore, UserProactiveSettings
if TYPE_CHECKING:
@@ -488,6 +490,9 @@ def send_daily_briefings(self) -> int:
def _send_daily_briefing(self, settings: UserProactiveSettings) -> bool:
"""Send a daily briefing to a user.
+ Uses an LLM to generate a detailed, natural-language briefing from
+ raw data — matching the quality of interactive briefing requests.
+
Args:
settings: User's proactive settings.
@@ -502,34 +507,13 @@ def _send_daily_briefing(self, settings: UserProactiveSettings) -> bool:
# Generate briefing data
briefing = self._generate_briefing()
- # Format the briefing
- formatted = format_briefing(briefing)
-
- # Add greeting
- hour = datetime.now().hour
- if hour < 12:
- greeting = "Good morning!"
- elif hour < 17:
- greeting = "Good afternoon!"
- else:
- greeting = "Good evening!"
-
- greeting_block = {
- "type": "section",
- "text": {
- "type": "mrkdwn",
- "text": f":wave: *{greeting}* Here's your daily briefing:",
- },
- }
-
- blocks = [greeting_block]
- if "blocks" in formatted:
- blocks.extend(formatted["blocks"])
+ # Use LLM to produce a detailed briefing (same quality as interactive)
+ briefing_text = self._format_briefing_with_llm(briefing)
self.slack_client.chat_postMessage(
channel=channel_id,
- text=f"{greeting} Here's your daily briefing.",
- blocks=blocks,
+ text=briefing_text,
+ mrkdwn=True,
)
logger.info(f"Sent daily briefing to {settings.user_id}")
@@ -542,6 +526,37 @@ def _send_daily_briefing(self, settings: UserProactiveSettings) -> bool:
logger.error(f"Error sending daily briefing: {e}")
return False
+ def _format_briefing_with_llm(self, briefing: dict[str, Any]) -> str:
+ """Use LLM to generate a detailed briefing from raw data.
+
+ Args:
+ briefing: Raw briefing data dictionary.
+
+ Returns:
+ Formatted briefing text in Slack mrkdwn.
+ """
+ prompt = f"""Generate a detailed daily briefing from this data. Write it in Slack mrkdwn format.
+
+Be thorough — include ALL events with times, ALL email account counts, ALL open PRs and issues
+with titles and repos, and ALL overdue tasks with projects and due dates. Add context where
+useful (e.g. note back-to-back meetings, highlight overdue items, mention if inbox is clear).
+
+Briefing data:
+{json.dumps(briefing, default=str, indent=2)}"""
+
+ try:
+ client = Anthropic(api_key=ANTHROPIC_API_KEY)
+ response = client.messages.create(
+ model=AGENT_MODEL,
+ max_tokens=2048,
+ messages=[{"role": "user", "content": prompt}],
+ )
+ return markdown_to_slack(response.content[0].text)
+ except Exception as e:
+ logger.warning(f"LLM briefing generation failed, falling back to static: {e}")
+ formatted = format_briefing(briefing)
+ return formatted.get("text", "Daily Briefing")
+
def _generate_briefing(self) -> dict[str, Any]:
"""Generate briefing data.
@@ -586,8 +601,20 @@ def _generate_briefing(self) -> dict[str, Any]:
except Exception as e:
logger.error(f"Error getting Todoist overdue tasks for briefing: {e}", exc_info=True)
+ # Release connections to prevent socket exhaustion
+ try:
+ self.multi_google.close()
+ except Exception:
+ pass
+
return briefing
+ def cleanup_connections(self):
+ """Periodically release Google API connections to prevent socket exhaustion."""
+ if self._multi_google is not None:
+ self._multi_google.close()
+ self._multi_google = None
+
def _get_dm_channel(self, user_id: str) -> str | None:
"""Get or open a DM channel with a user.
diff --git a/src/bot/security.py b/src/bot/security.py
index 20f3e2d..a331f9f 100644
--- a/src/bot/security.py
+++ b/src/bot/security.py
@@ -10,7 +10,7 @@
import time
from collections import defaultdict
from dataclasses import dataclass, field
-from datetime import datetime, timedelta
+from datetime import datetime
from enum import Enum
from typing import Any
@@ -24,7 +24,22 @@ def _normalize_action_type(action_type: str) -> str:
value = re.sub(r"_+", "_", value).strip("_")
aliases = {
"create_email_draft": "create_draft",
+ "create_calendar_event": "create_event",
+ "update_calendar_event": "update_event",
+ "delete_calendar_event": "delete_event",
"create_github_issue": "create_issue",
+ "create_todoist_task": "create_task",
+ "complete_todoist_task": "complete_task",
+ "update_todoist_task": "update_task",
+ "add_todoist_comment": "add_comment",
+ "reopen_todoist_task": "reopen_task",
+ "create_notion_page": "create_page",
+ "add_notion_comment": "add_comment",
+ "add_google_doc_comment": "add_comment",
+ "reply_google_doc_comment": "add_comment",
+ "resolve_google_doc_comment": "resolve_comment",
+ "update_proactive_settings": "update_settings",
+ "add_zotero_paper": "add_reference",
"send_email": "send_message",
}
return aliases.get(value, value)
@@ -362,7 +377,19 @@ def validate_action(
# Actions that require extra validation
sensitive_actions = {
"create_draft": "Creates an email draft",
+ "create_event": "Creates a calendar event",
+ "update_event": "Updates a calendar event",
+ "delete_event": "Deletes a calendar event",
"create_issue": "Creates a GitHub issue",
+ "create_task": "Creates a Todoist task",
+ "complete_task": "Completes a Todoist task",
+ "update_task": "Updates a Todoist task",
+ "reopen_task": "Reopens a Todoist task",
+ "create_page": "Creates a Notion page",
+ "add_comment": "Adds a comment",
+ "resolve_comment": "Resolves a comment",
+ "update_settings": "Updates notification settings",
+ "add_reference": "Adds a Zotero reference",
"send_message": "Sends a message",
}
@@ -487,10 +514,10 @@ def get_security_guard() -> SecurityGuard:
global _security_guard
if _security_guard is None:
from ..config import (
- SECURITY_LEVEL,
+ RATE_LIMIT_BLOCK_DURATION,
RATE_LIMIT_REQUESTS,
RATE_LIMIT_WINDOW,
- RATE_LIMIT_BLOCK_DURATION,
+ SECURITY_LEVEL,
)
_security_guard = SecurityGuard(
level=SecurityLevel(SECURITY_LEVEL),
diff --git a/src/bot/tools.py b/src/bot/tools.py
index 8f74bbc..ec03c1a 100644
--- a/src/bot/tools.py
+++ b/src/bot/tools.py
@@ -5,19 +5,18 @@
"""
from datetime import datetime, timedelta
-from enum import Enum
from typing import Any, Literal
from pydantic import BaseModel, Field
from ..config import (
ENABLE_DIRECT_EMAIL_SEND,
- PRIMARY_ACCOUNT,
ZOTERO_DEFAULT_COLLECTION,
get_accounts_description,
get_user_timezone,
)
+
class ToolResult(BaseModel):
"""Standard result format for tool execution."""
@@ -81,7 +80,7 @@ class SearchDriveTool(BaseModel):
# --- Calendar Tools ---
class GetCalendarEventsTool(BaseModel):
- """Get calendar events for a specific date from all Google calendars."""
+ """Get calendar events for a date, including current-time-aware upcoming and next-event fields."""
date: str = Field(
default="today",
@@ -142,6 +141,40 @@ class CreateCalendarEventTool(BaseModel):
)
+class UpdateCalendarEventTool(BaseModel):
+ """Update an existing calendar event, such as moving it, renaming it, changing location, or adding attendees."""
+
+ event_id: str = Field(description="Calendar event ID")
+ account: str | None = Field(
+ default=None,
+ description="Google account containing the event. " + get_accounts_description(),
+ )
+ calendar_id: str = Field(default="primary", description="Calendar ID, defaults to primary")
+ title: str | None = Field(default=None, description="New event title/summary")
+ date: str | None = Field(
+ default=None,
+ description="New start date: 'today', 'tomorrow', day name, or ISO format",
+ )
+ time: str | None = Field(default=None, description="New start time, e.g. '2pm' or '14:00'")
+ duration_minutes: int | None = Field(default=None, description="New duration in minutes")
+ location: str | None = Field(default=None, description="New location")
+ description: str | None = Field(default=None, description="New description")
+ attendees: list[str] | None = Field(default=None, description="Replacement attendee list")
+ send_notifications: bool = Field(default=True, description="Notify attendees of changes")
+
+
+class DeleteCalendarEventTool(BaseModel):
+ """Cancel/delete an existing calendar event."""
+
+ event_id: str = Field(description="Calendar event ID")
+ account: str | None = Field(
+ default=None,
+ description="Google account containing the event. " + get_accounts_description(),
+ )
+ calendar_id: str = Field(default="primary", description="Calendar ID, defaults to primary")
+ send_notifications: bool = Field(default=True, description="Send cancellation notifications")
+
+
# --- Email Tools ---
class GetUnreadCountsTool(BaseModel):
@@ -289,6 +322,30 @@ class CompleteTodoistTaskTool(BaseModel):
task_id: str = Field(description="The task ID to complete")
+class UpdateTodoistTaskTool(BaseModel):
+ """Update a Todoist task's content, description, due date, priority, or labels."""
+
+ task_id: str = Field(description="The task ID to update")
+ content: str | None = Field(default=None, description="New task title/content")
+ description: str | None = Field(default=None, description="New task description")
+ due: str | None = Field(default=None, description="New due date in natural language")
+ priority: int | None = Field(default=None, description="Priority 1-4 where 4 is urgent")
+ labels: list[str] | None = Field(default=None, description="Replacement labels")
+
+
+class AddTodoistCommentTool(BaseModel):
+ """Add a comment to a Todoist task."""
+
+ task_id: str = Field(description="The task ID to comment on")
+ content: str = Field(description="Comment text")
+
+
+class ReopenTodoistTaskTool(BaseModel):
+ """Reopen a completed Todoist task."""
+
+ task_id: str = Field(description="The task ID to reopen")
+
+
# --- Notion Tools ---
class SearchNotionTool(BaseModel):
@@ -316,6 +373,67 @@ class AddNotionCommentTool(BaseModel):
content: str = Field(description="Comment text")
+# --- Google Docs Tools ---
+
+class AddGoogleDocCommentTool(BaseModel):
+ """Add a comment to a Google Doc."""
+
+ document_id: str = Field(description="Google Doc document ID from the URL")
+ content: str = Field(description="Comment text")
+ quoted_text: str | None = Field(default=None, description="Optional quoted text to anchor the comment")
+ account: str | None = Field(
+ default=None,
+ description="Google account to use. " + get_accounts_description(),
+ )
+
+
+class ReplyGoogleDocCommentTool(BaseModel):
+ """Reply to an existing Google Doc comment."""
+
+ document_id: str = Field(description="Google Doc document ID")
+ comment_id: str = Field(description="Comment ID to reply to")
+ content: str = Field(description="Reply text")
+ account: str | None = Field(
+ default=None,
+ description="Google account to use. " + get_accounts_description(),
+ )
+
+
+class ResolveGoogleDocCommentTool(BaseModel):
+ """Resolve an existing Google Doc comment."""
+
+ document_id: str = Field(description="Google Doc document ID")
+ comment_id: str = Field(description="Comment ID to resolve")
+ account: str | None = Field(
+ default=None,
+ description="Google account to use. " + get_accounts_description(),
+ )
+
+
+# --- Proactive Settings Tools ---
+
+class GetProactiveSettingsTool(BaseModel):
+ """Get current proactive notification settings for this Slack user."""
+
+ pass
+
+
+class UpdateProactiveSettingsTool(BaseModel):
+ """Update proactive notification settings for this Slack user."""
+
+ calendar_reminders_enabled: bool | None = Field(default=None, description="Enable/disable calendar reminders")
+ email_alerts_enabled: bool | None = Field(default=None, description="Enable/disable important email alerts")
+ daily_briefing_enabled: bool | None = Field(default=None, description="Enable/disable daily briefings")
+ reminder_minutes_before: int | None = Field(default=None, description="Calendar reminder lead time in minutes")
+ briefing_hour: int | None = Field(default=None, description="Daily briefing hour, 0-23")
+ briefing_minute: int | None = Field(default=None, description="Daily briefing minute, 0-59")
+ briefing_days: list[int] | None = Field(default=None, description="Briefing days, Monday=0 through Sunday=6")
+ important_contacts: list[str] | None = Field(default=None, description="Important email sender substrings")
+ alert_keywords: list[str] | None = Field(default=None, description="Important email subject keywords")
+ quiet_hours_start: int | None = Field(default=None, description="Quiet-hours start hour, 0-23")
+ quiet_hours_end: int | None = Field(default=None, description="Quiet-hours end hour, 0-23")
+
+
# --- Zotero Tools ---
class SearchZoteroPapersTool(BaseModel):
@@ -380,6 +498,8 @@ def _build_all_tools() -> list[type[BaseModel]]:
GetCalendarEventsTool,
CheckAvailabilityTool,
CreateCalendarEventTool,
+ UpdateCalendarEventTool,
+ DeleteCalendarEventTool,
GetUnreadCountsTool,
CreateEmailDraftTool,
GetGitHubPRsTool,
@@ -392,9 +512,17 @@ def _build_all_tools() -> list[type[BaseModel]]:
GetTodoistTasksTool,
CreateTodoistTaskTool,
CompleteTodoistTaskTool,
+ UpdateTodoistTaskTool,
+ AddTodoistCommentTool,
+ ReopenTodoistTaskTool,
SearchNotionTool,
CreateNotionPageTool,
AddNotionCommentTool,
+ AddGoogleDocCommentTool,
+ ReplyGoogleDocCommentTool,
+ ResolveGoogleDocCommentTool,
+ GetProactiveSettingsTool,
+ UpdateProactiveSettingsTool,
SearchZoteroPapersTool,
GetZoteroPaperTool,
ListRecentPapersTool,
@@ -419,6 +547,8 @@ def _build_all_tools() -> list[type[BaseModel]]:
"GetCalendarEventsTool": "get_calendar_events",
"CheckAvailabilityTool": "check_availability",
"CreateCalendarEventTool": "create_calendar_event",
+ "UpdateCalendarEventTool": "update_calendar_event",
+ "DeleteCalendarEventTool": "delete_calendar_event",
"GetUnreadCountsTool": "get_unread_counts",
"CreateEmailDraftTool": "create_email_draft",
# Kept for defensive handling if older prompts/caches reference it.
@@ -433,9 +563,17 @@ def _build_all_tools() -> list[type[BaseModel]]:
"GetTodoistTasksTool": "get_todoist_tasks",
"CreateTodoistTaskTool": "create_todoist_task",
"CompleteTodoistTaskTool": "complete_todoist_task",
+ "UpdateTodoistTaskTool": "update_todoist_task",
+ "AddTodoistCommentTool": "add_todoist_comment",
+ "ReopenTodoistTaskTool": "reopen_todoist_task",
"SearchNotionTool": "search_notion",
"CreateNotionPageTool": "create_notion_page",
"AddNotionCommentTool": "add_notion_comment",
+ "AddGoogleDocCommentTool": "add_google_doc_comment",
+ "ReplyGoogleDocCommentTool": "reply_google_doc_comment",
+ "ResolveGoogleDocCommentTool": "resolve_google_doc_comment",
+ "GetProactiveSettingsTool": "get_proactive_settings",
+ "UpdateProactiveSettingsTool": "update_proactive_settings",
"SearchZoteroPapersTool": "search_zotero_papers",
"GetZoteroPaperTool": "get_zotero_paper",
"ListRecentPapersTool": "list_recent_papers",
diff --git a/src/config.py b/src/config.py
index 446b187..a12bb05 100644
--- a/src/config.py
+++ b/src/config.py
@@ -104,6 +104,7 @@ def get_google_credentials_path() -> Path:
# Todoist Configuration
TODOIST_API_KEY = get_env("TODOIST_API_KEY")
+TODOIST_PROJECT_ID = get_env("TODOIST_PROJECT_ID")
# Zotero Configuration
ZOTERO_API_KEY = get_env("ZOTERO_API_KEY")
@@ -150,8 +151,10 @@ def get_accounts_description() -> str:
# User timezone (IANA name, e.g., "America/Los_Angeles")
USER_TIMEZONE = get_env("USER_TIMEZONE", "America/Los_Angeles")
-# Minimum interval between Slack message updates (in seconds) to avoid rate limiting
-STREAMING_UPDATE_INTERVAL = float(get_env("STREAMING_UPDATE_INTERVAL", "0.5"))
+# Minimum interval between Slack message updates (in seconds) to avoid rate limiting.
+# Slack redraws edited messages, so sentence-level updates generally feel smoother
+# than token-level repainting.
+STREAMING_UPDATE_INTERVAL = float(get_env("STREAMING_UPDATE_INTERVAL", "1.25"))
# Database paths
KNOWLEDGE_GRAPH_DB = PROJECT_ROOT / get_env("KNOWLEDGE_GRAPH_DB", "data/knowledge_graph.db")
@@ -267,7 +270,3 @@ def validate_config() -> list[str]:
issues.append("SLACK_AUTHORIZED_USERS not set (bot will reject all users)")
return issues
-
-
-# Initialize directories on import
-ensure_directories()
diff --git a/src/ideaspark/__init__.py b/src/ideaspark/__init__.py
new file mode 100644
index 0000000..8a43324
--- /dev/null
+++ b/src/ideaspark/__init__.py
@@ -0,0 +1 @@
+"""IdeaSpark — daily AI × cancer research idea generator."""
diff --git a/src/ideaspark/agent.py b/src/ideaspark/agent.py
new file mode 100644
index 0000000..59aa696
--- /dev/null
+++ b/src/ideaspark/agent.py
@@ -0,0 +1,494 @@
+"""IdeaSpark agent — daily AI × cancer research idea generation."""
+
+import json
+import logging
+from datetime import datetime
+
+import anthropic
+
+from src.config import ANTHROPIC_API_KEY, AGENT_MODEL, get_user_timezone
+from src.ideaspark.corpus import PaperCorpus
+from src.ideaspark.deep_research import DeepResearcher
+from src.ideaspark.literature import LiteratureMonitor
+from src.ideaspark.memory import IdeaMemory
+
+logger = logging.getLogger(__name__)
+
+# ── Rotating thematic schedule ────────────────────────────────────────
+
+THEMES = [
+ {
+ "name": "Genomic FMs × unexpected domains",
+ "query": "foundation model genomics microbiology ecology neuroscience agriculture evolution antibiotic resistance",
+ "description": (
+ "Anchor on a genomic FM (Evo 2, Enformer, Borzoi, Nucleotide Transformer 3, Caduceus) "
+ "and reach into a domain FAR from cancer: microbiology, ecology, neuroscience, "
+ "agriculture, evolutionary biology, infectious disease, conservation. Where would "
+ "large-scale genomic models create breakthroughs that specialists in those fields "
+ "couldn't achieve on their own?"
+ ),
+ },
+ {
+ "name": "Liquid biopsy tech × non-cancer applications",
+ "query": "cell-free RNA cfRNA biomarker neurodegeneration autoimmune transplant organ injury pregnancy infection",
+ "description": (
+ "Anchor on Hani's liquid biopsy capabilities (Exai-1, oncRNA, cfRNA profiling). "
+ "Reach into non-cancer clinical domains: neurodegeneration, organ transplant rejection, "
+ "autoimmune disease, infectious disease monitoring, pregnancy complications, "
+ "mental health biomarkers, aging. Where does cfRNA give an edge no one is exploiting?"
+ ),
+ },
+ {
+ "name": "Perturbation biology × systems outside oncology",
+ "query": "perturbation CRISPR screen drug response immunology neuroscience development regeneration stem cell",
+ "description": (
+ "Anchor on perturbation capabilities (STATE, Tahoe-100M, GENEVA, CRISPR screening). "
+ "Reach into immunology, neuroscience, developmental biology, regenerative medicine, "
+ "stem cell engineering, or metabolic disease. Where would massive perturbation atlases "
+ "reshape understanding in a field that hasn't had access to this scale of data?"
+ ),
+ },
+ {
+ "name": "RNA biology × synthetic biology & engineering",
+ "query": "RNA structure synthetic biology gene circuit riboswitch biosensor RNA device metabolic engineering",
+ "description": (
+ "Anchor on RNA biology tools (SwitchSeeker, Mach-1, SHAPE-FM, RiNALMo). "
+ "Reach into synthetic biology, biosensor design, metabolic engineering, gene circuits, "
+ "biomanufacturing, or environmental monitoring. Where can deep RNA structural "
+ "understanding enable engineered biological systems outside therapeutics?"
+ ),
+ },
+ {
+ "name": "Single-cell AI × clinical & population science",
+ "query": "single-cell clinical trial epidemiology population health aging public health biobank",
+ "description": (
+ "Anchor on single-cell AI (scBaseCount, STATE, Tahoe-x1, scFoundation). "
+ "Reach into clinical trial design, epidemiology, population health, aging research, "
+ "biobanking, or health disparities. How can cell-level AI models transform "
+ "large-cohort studies or clinical decision-making at scale?"
+ ),
+ },
+ {
+ "name": "Bio FMs × physical sciences & computation",
+ "query": "foundation model physics materials protein design robotics optimization simulation quantum",
+ "description": (
+ "Anchor on any Goodarzi lab FM or dataset. Reach into physics-inspired methods, "
+ "materials science, robot-scientist systems, active learning, simulation, "
+ "optimal experimental design, or information theory. Where do ideas from "
+ "physical sciences or CS theory create new paradigms for biological modeling?"
+ ),
+ },
+ {
+ "name": "Cancer data × global health & equity",
+ "query": "global health equity low resource diagnostics point-of-care Africa Asia Latin America",
+ "description": (
+ "Anchor on any Goodarzi lab tool, model, or dataset. Reach into global health, "
+ "point-of-care diagnostics, low-resource settings, neglected diseases, health equity, "
+ "or frugal innovation. How can cutting-edge AI and omics tools be adapted or "
+ "transferred to address health challenges in underserved populations?"
+ ),
+ },
+]
+
+
+def get_todays_theme(idea_count: int) -> dict:
+ """Select theme rotating each idea, cycling through all themes."""
+ index = idea_count % len(THEMES)
+ return THEMES[index]
+
+
+# ── Idea generation prompt ────────────────────────────────────────────
+
+SYSTEM_PROMPT = """You are IdeaSpark, a research ideation agent for Hani Goodarzi's lab.
+
+Hani is a Core Investigator at Arc Institute, Associate Professor at UCSF (becoming full Professor July 2026), and AI Research Lead at Arc Computational Tech Center. His lab works at the intersection of RNA biology, cancer genomics, AI/ML, single-cell omics, and virtual cell models.
+
+Key active projects: Evo 2 (40B DNA foundation model), Mach-1/1.5 (RNA foundation models), CodonFM (codon-resolution FMs with NVIDIA), Orion (generative AI for oncRNA cancer detection), Exai-1 (multimodal cfRNA foundation model), scBaseCount (AI-curated single-cell repo), STATE (perturbation prediction), Tahoe-100M (largest single-cell drug perturbation atlas), GENEVA (molecular phenotyping), SwitchSeeker (RNA structural switches).
+
+Companies: Exai Bio (liquid biopsy), Tahoe Therapeutics (single-cell drug perturbation), Therna Biosciences (programmable RNA therapeutics).
+
+Foundation models in scope:
+- DNA/genomic FMs: Evo 2, Enformer, Borzoi, Nucleotide Transformer 3, Caduceus
+- Single-cell FMs: STATE, Tahoe-x1, scFoundation
+- RNA FMs: Mach-1, CodonFM, RiNALMo, SHAPE-FM (unpublished, Goodarzi lab)
+- Protein FMs: Boltz-2, ESM/ESM2 (Meta), AlphaFold/AlphaFold3
+- Chemical/drug FMs: MolBERT, ChemBERTa, MolGPT
+- Multi-modal: BiomedCLIP, PLIP
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+CRITICAL INSTRUCTION — IDEA GENERATION PHILOSOPHY
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+Your job is NOT to recombine things Hani already does. He already thinks about those intersections every day. Your job is to PULL HIM INTO UNFAMILIAR TERRITORY.
+
+Each idea must:
+1. ANCHOR on exactly ONE pillar of Hani's work (one paper, one dataset, one capability)
+2. REACH into a field or method Hani does NOT currently work in — the further from his comfort zone the better
+3. The "reach" should come from the new literature or the theme — fields like immunology, neuroscience, ecology, physics, materials science, clinical trial design, epidemiology, synthetic biology, metabolomics, imaging, robotics, etc.
+4. Be specific enough to act on (not vague hand-waving)
+5. Explain WHY the anchor gives Hani a unique edge in this unfamiliar space
+6. The idea should feel slightly uncomfortable — if it's obvious to someone in Hani's lab, it's not far enough
+
+BAD ideas (too close to home):
+- "Use Mach-1 to study splicing in cancer" (he already does this)
+- "Combine Evo 2 with liquid biopsy" (he already thinks about this)
+- "Apply STATE to predict drug responses" (literally the project)
+
+GOOD ideas (one anchor, far reach):
+- "Use Evo 2's genomic representations to predict antibiotic resistance evolution in hospital microbiomes" (anchor: Evo 2, reach: clinical microbiology)
+- "Apply SwitchSeeker's RNA structure methods to discover riboswitches in crop pathogens for agricultural biocontrol" (anchor: SwitchSeeker, reach: agriculture)
+- "Repurpose Tahoe-100M perturbation embeddings as features for predicting clinical trial outcomes" (anchor: Tahoe, reach: clinical trial design)
+
+When suggesting collaborators, prioritize researchers at Arc Institute, Stanford, UCSF, and Berkeley — but specifically researchers whose expertise covers the UNFAMILIAR territory, not Hani's own domain."""
+
+
+def build_generation_prompt(
+ theme: dict,
+ strategy: str,
+ corpus_papers: list[dict],
+ new_papers: list[dict],
+ memory: IdeaMemory,
+ is_stretch: bool = False,
+ research_brief: str = "",
+) -> str:
+ """Build the user prompt for idea generation."""
+
+ # Format corpus papers
+ corpus_section = "\n".join([
+ f"- [{p.get('year', '')}] {p.get('title', '')} ({p.get('journal', '')})"
+ for p in corpus_papers[:8]
+ ])
+
+ # Format new literature
+ lit_section = "\n".join([
+ f"- [{p.get('source', '')}] {p.get('title', '')} ({p.get('date', '')})"
+ + (f"\n Abstract: {p.get('abstract', '')[:200]}..." if p.get('abstract') else "")
+ for p in new_papers[:10]
+ ])
+
+ # Preference context
+ pref_context = ""
+ preferred_themes = memory.get_preferred_themes()
+ if preferred_themes:
+ pref_context = f"\nHani has shown preference for ideas in: {', '.join(preferred_themes)}."
+
+ # Strategy description
+ if strategy == "A":
+ strategy_desc = (
+ "Strategy A — Pick ONE of Hani's papers below as your anchor. Then look at the "
+ "new literature and find a paper from a DIFFERENT field that creates an unexpected "
+ "opportunity. The idea should live in the OTHER field, with Hani's anchor "
+ "providing a unique edge that researchers in that field lack."
+ )
+ else:
+ strategy_desc = (
+ "Strategy B — Pick ONE of Hani's capabilities (a model, dataset, or method) as "
+ "your anchor. Then identify an unsolved problem in a field OUTSIDE Hani's current "
+ "work — immunology, neuroscience, ecology, materials, clinical trials, public health, "
+ "synthetic biology, agriculture, etc. — where that anchor could be transformative."
+ )
+
+ stretch_note = ""
+ if is_stretch:
+ stretch_note = (
+ "\n\n⚡ This is a STRETCH idea. Push boundaries — suggest moonshots that may "
+ "require new collaborations, data types, or capabilities outside the current group. "
+ "Still grounded in Hani's expertise, but ambitious."
+ )
+
+ idea_number = memory.get_idea_count() + 1
+
+ # Deep research brief section
+ research_section = ""
+ if research_brief:
+ research_section = f"""
+
+### Deep Research Brief (today's exploration of unfamiliar territory):
+{research_brief}
+"""
+
+ prompt = f"""Generate IdeaSpark #{idea_number}.
+
+**Today's Theme:** {theme['name']}
+{theme['description']}
+
+**{strategy_desc}**{stretch_note}{pref_context}
+
+---
+{research_section}
+### Hani's Papers (pick ONE as your anchor — do NOT combine multiple):
+{corpus_section}
+
+### Papers from the Unfamiliar Domain:
+{lit_section}
+
+---
+
+Produce a structured brief in EXACTLY this format (no markdown headers, use the exact labels):
+
+🧬 IdeaSpark #{idea_number} — {datetime.now(get_user_timezone()).strftime('%A, %B %d, %Y')}
+Theme: {theme['name']}
+{"[STRETCH]" if is_stretch else ""}
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+*The Gap*
+[2-3 sentences: what's missing, unsolved, or newly possible]
+
+*Hypothesis*
+[1-2 sentences: the core claim or bet]
+
+*Proposed Approach*
+[3-5 sentences: method sketch — what data, what model, what experiment]
+
+*Why You*
+[1-2 sentences: which specific papers/capabilities make Hani uniquely positioned]
+
+*Key Risk*
+[1 sentence: the thing most likely to make this fail]
+
+*Relevant Papers*
+- [Hani paper 1] — [why relevant]
+- [Hani paper 2] — [why relevant]
+- [New paper 1] — [what it enables]
+
+*Potential Collaborators*
+- [Name, Institution] — [why they're a good fit for this idea]
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+Scores: Novelty [X/5] · Feasibility [X/5] · Impact [X/5]
+Strategy: {"A: your work × new lit" if strategy == "A" else "B: your work × trends"}
+
+Also return a JSON block at the very end (after the brief) with:
+```json
+{{
+ "title": "",
+ "novelty": <1-5>,
+ "feasibility": <1-5>,
+ "impact": <1-5>,
+ "source_papers": ["", ""],
+ "new_papers": [""]
+}}
+```
+"""
+ return prompt
+
+
+# ── Main generation pipeline ──────────────────────────────────────────
+
+class IdeaSparkAgent:
+ """Orchestrates daily idea generation."""
+
+ def __init__(self):
+ self.corpus = PaperCorpus()
+ self.literature = LiteratureMonitor()
+ self.memory = IdeaMemory()
+ self.researcher = DeepResearcher()
+ self.client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
+
+ def _get_literature(self) -> list[dict]:
+ """Fetch or load cached recent literature."""
+ if self.literature.has_today_cache():
+ return self.literature.load_cache()
+ papers = self.literature.fetch_all(limit_per_source=50)
+ self.literature.cache_papers(papers)
+ return papers
+
+ def _filter_literature_by_theme(self, papers: list[dict], theme: dict) -> list[dict]:
+ """Score and filter new papers by relevance to theme."""
+ theme_keywords = set(theme["query"].lower().split())
+ scored = []
+ for p in papers:
+ text = f"{p.get('title', '')} {p.get('abstract', '')}".lower()
+ overlap = sum(1 for kw in theme_keywords if kw in text)
+ if overlap >= 2:
+ p["theme_relevance"] = overlap
+ scored.append(p)
+ scored.sort(key=lambda x: x.get("theme_relevance", 0), reverse=True)
+ if scored:
+ return scored[:10]
+ return sorted(papers, key=lambda x: x.get("date", ""), reverse=True)[:10]
+
+ def generate_idea(self, max_retries: int = 3) -> dict | None:
+ """Run the full pipeline: theme → papers → literature → LLM → brief.
+
+ Retry strategy on duplicate/low-quality:
+ Attempt 1: original theme + preferred strategy
+ Attempt 2: same theme, flipped strategy, shuffled corpus
+ Attempt 3: rotate to next theme entirely
+
+ Returns dict with keys: brief, title, scores, metadata, or None on failure.
+ """
+ import random
+
+ idea_count = self.memory.get_idea_count()
+ base_theme = get_todays_theme(idea_count)
+ base_strategy = self.memory.get_preferred_strategy()
+ is_stretch = self.memory.should_be_stretch()
+ if is_stretch:
+ logger.info("STRETCH idea day")
+
+ # Run deep research for today's theme (this is the heavy lift)
+ corpus_papers = self.corpus.search(base_theme["query"], top_k=8)
+ logger.info(f"Running deep research for theme: {base_theme['name']}")
+ research_result = self.researcher.research_theme(
+ theme=base_theme,
+ anchor_papers=corpus_papers,
+ )
+ research_brief = research_result.get("research_brief", "")
+ discovered_papers = research_result.get("papers", [])
+ logger.info(
+ f"Deep research complete: {len(discovered_papers)} papers, "
+ f"brief={len(research_brief)} chars"
+ )
+
+ # Also get standard literature as fallback
+ new_papers = self._get_literature()
+
+ rejected_titles: list[str] = []
+ for attempt in range(1, max_retries + 1):
+ # Vary inputs on retry
+ if attempt == 1:
+ theme = base_theme
+ strategy = base_strategy
+ elif attempt == 2:
+ theme = base_theme
+ strategy = "B" if base_strategy == "A" else "A"
+ logger.info(f"Attempt {attempt}: flipping to strategy {strategy}")
+ else:
+ theme_idx = (THEMES.index(base_theme) + 1) % len(THEMES)
+ theme = THEMES[theme_idx]
+ strategy = "B" if base_strategy == "A" else "A"
+ logger.info(f"Attempt {attempt}: rotating to theme '{theme['name']}'")
+
+ logger.info(f"Theme: {theme['name']}, Strategy: {strategy}")
+
+ # Get corpus papers (shuffle on retry so different papers surface)
+ corpus_papers = self.corpus.search(theme["query"], top_k=8)
+ if attempt > 1:
+ random.shuffle(corpus_papers)
+ logger.info(f"Corpus papers: {len(corpus_papers)}")
+
+ # Use deep research papers primarily, standard lit as supplement
+ relevant_new = discovered_papers[:10] if discovered_papers else \
+ self._filter_literature_by_theme(new_papers, theme)
+ logger.info(f"Papers for prompt: {len(relevant_new)} (deep research: {bool(discovered_papers)})")
+
+ # Build prompt
+ prompt = build_generation_prompt(
+ theme=theme,
+ strategy=strategy,
+ corpus_papers=corpus_papers,
+ new_papers=relevant_new,
+ memory=self.memory,
+ is_stretch=is_stretch,
+ research_brief=research_brief,
+ )
+
+ if rejected_titles:
+ prompt += (
+ f"\n\nIMPORTANT: Do NOT propose ideas similar to these (already generated): "
+ f"{', '.join(rejected_titles)}. Find a genuinely different angle, "
+ f"different biological question, different methodology."
+ )
+
+ # Call Claude
+ try:
+ response = self.client.messages.create(
+ model=AGENT_MODEL,
+ max_tokens=2000,
+ system=SYSTEM_PROMPT,
+ messages=[{"role": "user", "content": prompt}],
+ )
+ full_text = response.content[0].text
+ except Exception as e:
+ logger.error(f"Claude API call failed: {e}")
+ return None
+
+ # Parse response
+ brief, metadata = self._parse_response(full_text)
+ if not metadata:
+ metadata = {
+ "title": f"IdeaSpark #{idea_count + 1}",
+ "novelty": 3, "feasibility": 3, "impact": 3,
+ "source_papers": [], "new_papers": [],
+ }
+
+ scores = {
+ "novelty": metadata.get("novelty", 3),
+ "feasibility": metadata.get("feasibility", 3),
+ "impact": metadata.get("impact", 3),
+ }
+
+ # Quality gate
+ if all(v < 2 for v in scores.values()):
+ logger.info(f"Attempt {attempt}: below quality threshold — retrying")
+ rejected_titles.append(metadata.get("title", "unknown"))
+ continue
+
+ # Deduplication check
+ emb = None
+ try:
+ temp_corpus = PaperCorpus()
+ emb = temp_corpus.embed_query(brief[:500])
+ if self.memory.is_duplicate(emb.tolist()):
+ logger.info(f"Attempt {attempt}: duplicate detected — retrying")
+ rejected_titles.append(metadata.get("title", "unknown"))
+ continue
+ except Exception:
+ pass
+
+ # Passed — break out of retry loop
+ break
+ else:
+ logger.warning(f"Failed to generate unique idea after {max_retries} attempts")
+ return None
+
+ # Log the idea
+ idea_number = idea_count + 1
+ self.memory.log_idea(
+ idea_number=idea_number,
+ theme=theme["name"],
+ strategy=strategy,
+ title=metadata.get("title", ""),
+ brief=brief,
+ scores=scores,
+ source_papers=metadata.get("source_papers", []),
+ new_papers=metadata.get("new_papers", []),
+ is_stretch=is_stretch,
+ embedding=emb.tolist() if emb is not None else None,
+ )
+
+ return {
+ "brief": brief,
+ "title": metadata.get("title", ""),
+ "scores": scores,
+ "theme": theme["name"],
+ "strategy": strategy,
+ "is_stretch": is_stretch,
+ "idea_number": idea_number,
+ "metadata": metadata,
+ }
+
+ def _parse_response(self, text: str) -> tuple[str, dict | None]:
+ """Separate the structured brief from the JSON metadata block."""
+ # Find JSON block
+ metadata = None
+ brief = text
+
+ json_start = text.rfind("```json")
+ if json_start == -1:
+ json_start = text.rfind("```\n{")
+
+ if json_start != -1:
+ brief = text[:json_start].strip()
+ json_block = text[json_start:]
+ # Extract JSON
+ json_block = json_block.replace("```json", "").replace("```", "").strip()
+ try:
+ metadata = json.loads(json_block)
+ except json.JSONDecodeError:
+ logger.warning("Failed to parse JSON metadata from response")
+
+ return brief, metadata
diff --git a/src/ideaspark/corpus.py b/src/ideaspark/corpus.py
new file mode 100644
index 0000000..4f587c7
--- /dev/null
+++ b/src/ideaspark/corpus.py
@@ -0,0 +1,164 @@
+"""Paper corpus management — indexing, embedding, and retrieval of Hani's publications."""
+
+import json
+import logging
+from pathlib import Path
+
+import numpy as np
+from openai import OpenAI
+
+from src.config import OPENAI_API_KEY, EMBEDDING_MODEL, PROJECT_ROOT
+
+logger = logging.getLogger(__name__)
+
+DATA_DIR = PROJECT_ROOT / "data" / "ideaspark"
+CORPUS_PATH = DATA_DIR / "papers_corpus.json"
+
+
+# ── Method + biology tags for categorisation ──────────────────────────
+
+METHOD_TAGS = [
+ "foundation model", "deep learning", "single-cell", "CRISPR screen",
+ "liquid biopsy", "RNA structure", "splicing", "perturbation modeling",
+ "generative model", "NLP/LLM", "computer vision", "graph neural network",
+ "sequence model", "codon optimization", "flow cytometry", "mass spec",
+ "spatial transcriptomics", "drug screening", "phylogenetics",
+]
+
+BIOLOGY_TAGS = [
+ "RNA regulation", "cancer detection", "drug response", "metastasis",
+ "tRNA biology", "codon usage", "breast cancer", "prostate cancer",
+ "tumor evolution", "RNA therapeutics", "cell-free RNA", "oncRNA",
+ "RBP networks", "splicing dysregulation", "RNA structure switches",
+ "single-cell atlas", "perturbation biology", "translation regulation",
+]
+
+
+class PaperCorpus:
+ """Manages Hani's publication corpus with embeddings for semantic retrieval."""
+
+ def __init__(self):
+ self.papers: list[dict] = []
+ self.embeddings: np.ndarray | None = None
+ self._oai = None
+ if OPENAI_API_KEY:
+ try:
+ self._oai = OpenAI(api_key=OPENAI_API_KEY)
+ except Exception as e:
+ logger.warning(f"Could not init OpenAI client: {e}")
+ self._load()
+
+ # ── persistence ───────────────────────────────────────────────────
+
+ def _load(self):
+ if CORPUS_PATH.exists():
+ with open(CORPUS_PATH) as f:
+ self.papers = json.load(f)
+ emb_path = DATA_DIR / "paper_embeddings.npy"
+ if emb_path.exists():
+ self.embeddings = np.load(emb_path)
+ logger.info(f"Loaded corpus: {len(self.papers)} papers")
+
+ def save(self):
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
+ with open(CORPUS_PATH, "w") as f:
+ json.dump(self.papers, f, indent=2)
+ if self.embeddings is not None:
+ np.save(DATA_DIR / "paper_embeddings.npy", self.embeddings)
+ logger.info(f"Saved corpus: {len(self.papers)} papers")
+
+ # ── corpus building ───────────────────────────────────────────────
+
+ def add_paper(self, paper: dict):
+ """Add a paper dict with keys: title, abstract, authors, year, journal, doi, citations."""
+ self.papers.append(paper)
+
+ def build_from_list(self, papers: list[dict]):
+ """Bulk-load a list of paper dicts."""
+ self.papers = papers
+ logger.info(f"Loaded {len(papers)} papers into corpus")
+
+ # ── embedding ─────────────────────────────────────────────────────
+
+ def _text_for_paper(self, p: dict) -> str:
+ title = p.get("title", "")
+ abstract = p.get("abstract", "")
+ return f"{title}\n{abstract}".strip()
+
+ def embed_all(self):
+ """Compute embeddings for all papers using OpenAI text-embedding-3-large."""
+ if not self._oai:
+ raise RuntimeError("OPENAI_API_KEY not set — cannot embed")
+
+ texts = [self._text_for_paper(p) for p in self.papers]
+ # batch in chunks of 50
+ all_embs = []
+ for i in range(0, len(texts), 50):
+ batch = texts[i : i + 50]
+ resp = self._oai.embeddings.create(model=EMBEDDING_MODEL, input=batch)
+ all_embs.extend([d.embedding for d in resp.data])
+ self.embeddings = np.array(all_embs, dtype=np.float32)
+ logger.info(f"Embedded {len(self.papers)} papers → shape {self.embeddings.shape}")
+
+ def embed_query(self, query: str) -> np.ndarray:
+ """Embed a single query string."""
+ if not self._oai:
+ raise RuntimeError("OPENAI_API_KEY not set")
+ resp = self._oai.embeddings.create(model=EMBEDDING_MODEL, input=[query])
+ return np.array(resp.data[0].embedding, dtype=np.float32)
+
+ # ── retrieval ─────────────────────────────────────────────────────
+
+ def search(self, query: str, top_k: int = 5) -> list[dict]:
+ """Semantic search over corpus. Returns top-k papers with scores."""
+ if self.embeddings is None or len(self.embeddings) == 0:
+ logger.warning("No embeddings — returning random sample")
+ import random
+ return random.sample(self.papers, min(top_k, len(self.papers)))
+
+ q_emb = self.embed_query(query)
+ # cosine similarity
+ norms = np.linalg.norm(self.embeddings, axis=1) * np.linalg.norm(q_emb)
+ sims = (self.embeddings @ q_emb) / np.where(norms > 0, norms, 1.0)
+
+ idxs = np.argsort(sims)[::-1][:top_k]
+ results = []
+ for idx in idxs:
+ paper = self.papers[idx].copy()
+ paper["relevance_score"] = float(sims[idx])
+ results.append(paper)
+ return results
+
+ def search_by_tags(self, method_tags: list[str] = None, biology_tags: list[str] = None, top_k: int = 5) -> list[dict]:
+ """Filter papers by method and/or biology tags."""
+ results = []
+ for p in self.papers:
+ p_methods = set(p.get("method_tags", []))
+ p_biology = set(p.get("biology_tags", []))
+ score = 0
+ if method_tags:
+ score += len(p_methods & set(method_tags))
+ if biology_tags:
+ score += len(p_biology & set(biology_tags))
+ if score > 0:
+ paper = p.copy()
+ paper["tag_match_score"] = score
+ results.append(paper)
+ results.sort(key=lambda x: x["tag_match_score"], reverse=True)
+ return results[:top_k]
+
+ # ── stats ─────────────────────────────────────────────────────────
+
+ @property
+ def size(self) -> int:
+ return len(self.papers)
+
+ def summary(self) -> str:
+ if not self.papers:
+ return "Empty corpus"
+ years = [p.get("year", 0) for p in self.papers]
+ return (
+ f"{len(self.papers)} papers, "
+ f"{min(years)}–{max(years)}, "
+ f"embeddings={'yes' if self.embeddings is not None else 'no'}"
+ )
diff --git a/src/ideaspark/deep_research.py b/src/ideaspark/deep_research.py
new file mode 100644
index 0000000..de9ed77
--- /dev/null
+++ b/src/ideaspark/deep_research.py
@@ -0,0 +1,273 @@
+"""Deep research module — multi-turn Claude exploration of unfamiliar domains.
+
+Before generating an idea, the agent conducts a research session:
+1. Given a theme (with its unfamiliar domain), generate targeted search queries
+2. Pull papers from PubMed, bioRxiv, arXiv for the UNFAMILIAR domain
+3. Have Claude synthesize findings into a research brief
+4. Feed the brief + Hani's anchor papers into the idea generator
+
+This makes each idea grounded in real, current work from the target domain.
+"""
+
+import json
+import logging
+import time
+from datetime import datetime, timedelta
+
+import anthropic
+import requests
+
+from src.config import ANTHROPIC_API_KEY, AGENT_MODEL
+
+logger = logging.getLogger(__name__)
+
+# ── Search helpers ───────────────────────────────────────────────────
+
+def _pubmed_search(query: str, max_results: int = 20, days: int = 90) -> list[dict]:
+ """Search PubMed for papers matching query within date window."""
+ end = datetime.now()
+ start = end - timedelta(days=days)
+ try:
+ search_url = (
+ "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+ f"?db=pubmed&retmode=json&retmax={max_results}"
+ f"&datetype=pdat&mindate={start.strftime('%Y/%m/%d')}"
+ f"&maxdate={end.strftime('%Y/%m/%d')}"
+ f"&term={requests.utils.quote(query)}"
+ )
+ resp = requests.get(search_url, timeout=30)
+ resp.raise_for_status()
+ pmids = resp.json().get("esearchresult", {}).get("idlist", [])
+ if not pmids:
+ return []
+ time.sleep(0.4)
+
+ fetch_url = (
+ "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
+ f"?db=pubmed&retmode=json&id={','.join(pmids)}"
+ )
+ resp = requests.get(fetch_url, timeout=30)
+ resp.raise_for_status()
+ data = resp.json().get("result", {})
+
+ papers = []
+ for pmid in pmids:
+ info = data.get(pmid, {})
+ if not info or pmid == "uids":
+ continue
+ authors = [a.get("name", "") for a in info.get("authors", [])]
+ papers.append({
+ "source": "pubmed",
+ "title": info.get("title", ""),
+ "authors": ", ".join(authors[:3]),
+ "date": info.get("pubdate", ""),
+ "journal": info.get("fulljournalname", ""),
+ "pmid": pmid,
+ })
+ return papers
+ except Exception as e:
+ logger.warning(f"PubMed deep search failed: {e}")
+ return []
+
+
+def _biorxiv_search(query_terms: list[str], max_results: int = 20, days: int = 60) -> list[dict]:
+ """Fetch recent bioRxiv preprints and keyword-filter for query terms."""
+ end = datetime.now()
+ start = end - timedelta(days=days)
+ date_range = f"{start.strftime('%Y-%m-%d')}/{end.strftime('%Y-%m-%d')}"
+
+ papers = []
+ try:
+ url = f"https://api.biorxiv.org/details/biorxiv/{date_range}/0/100"
+ resp = requests.get(url, timeout=30)
+ resp.raise_for_status()
+ collection = resp.json().get("collection", [])
+
+ terms_lower = [t.lower() for t in query_terms]
+ for item in collection:
+ text = f"{item.get('title', '')} {item.get('abstract', '')}".lower()
+ if any(t in text for t in terms_lower):
+ papers.append({
+ "source": "biorxiv",
+ "title": item.get("title", ""),
+ "abstract": item.get("abstract", "")[:300],
+ "authors": item.get("authors", ""),
+ "date": item.get("date", ""),
+ "category": item.get("category", ""),
+ })
+ except Exception as e:
+ logger.warning(f"bioRxiv deep search failed: {e}")
+
+ return papers[:max_results]
+
+
+# ── Deep research pipeline ───────────────────────────────────────────
+
+QUERY_GEN_PROMPT = """You are a research librarian helping a computational biologist explore an UNFAMILIAR domain.
+
+Theme for today's exploration: {theme_name}
+Description: {theme_description}
+
+The researcher (Hani Goodarzi) is an expert in: RNA biology, cancer genomics, AI/ML foundation models, single-cell omics, liquid biopsy.
+
+His ANCHOR for today is one of these papers/capabilities:
+{anchor_summary}
+
+Your task: generate 4-6 TARGETED PubMed search queries that will find the most interesting recent work in the UNFAMILIAR parts of this theme. The queries should:
+1. Focus on the destination domain (NOT on cancer or RNA biology — Hani already knows that)
+2. Include methodological terms that would catch papers where Hani's tools could apply
+3. Be specific enough to return high-quality results (not thousands of generic hits)
+4. Cover different angles of the unfamiliar domain
+
+Also generate 2-3 keyword lists for bioRxiv filtering (bioRxiv doesn't support complex queries).
+
+Return ONLY a JSON object:
+```json
+{{
+ "pubmed_queries": ["query1", "query2", ...],
+ "biorxiv_keywords": [["term1", "term2"], ["term3", "term4"], ...]
+}}
+```"""
+
+SYNTHESIS_PROMPT = """You are a research analyst preparing a briefing for a computational biologist who is EXPLORING OUTSIDE HIS COMFORT ZONE.
+
+The researcher (Hani Goodarzi) is expert in: RNA biology, cancer genomics, AI/ML foundation models, single-cell omics, liquid biopsy. He is NOT an expert in the domain below.
+
+Today's theme: {theme_name}
+{theme_description}
+
+Here are papers found in the UNFAMILIAR domain:
+
+{papers_formatted}
+
+Your task: write a research brief (500-800 words) that:
+1. Identifies the 3-4 most exciting open problems or recent breakthroughs in this domain
+2. For each, explains WHY someone with Hani's specific toolkit might have a unique angle
+3. Highlights any paper that seems ripe for cross-pollination with genomic/RNA/single-cell AI methods
+4. Notes specific datasets, challenges, or collaborator types in this domain
+5. Is honest about what you DON'T know — flag gaps where deeper reading would help
+
+Write it as a crisp briefing, not a lit review. Be opinionated about what's most promising."""
+
+
+class DeepResearcher:
+ """Conducts multi-step research into unfamiliar domains before idea generation."""
+
+ def __init__(self):
+ self.client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
+
+ def research_theme(
+ self,
+ theme: dict,
+ anchor_papers: list[dict],
+ ) -> dict:
+ """Run deep research for a theme. Returns research brief + discovered papers.
+
+ Steps:
+ 1. Generate targeted search queries via Claude
+ 2. Execute searches against PubMed + bioRxiv
+ 3. Synthesize findings into a research brief
+ """
+ logger.info(f"Deep research: {theme['name']}")
+
+ # Format anchor papers
+ anchor_summary = "\n".join(
+ f"- {p.get('title', '')} ({p.get('journal', '')}, {p.get('year', '')})"
+ for p in anchor_papers[:5]
+ )
+
+ # Step 1: Generate search queries
+ queries = self._generate_queries(theme, anchor_summary)
+ if not queries:
+ logger.warning("Query generation failed, using theme keywords as fallback")
+ queries = {
+ "pubmed_queries": [theme["query"]],
+ "biorxiv_keywords": [theme["query"].split()[:3]],
+ }
+
+ # Step 2: Execute searches
+ all_papers = []
+
+ for q in queries.get("pubmed_queries", [])[:5]:
+ papers = _pubmed_search(q, max_results=15, days=90)
+ all_papers.extend(papers)
+ time.sleep(0.5)
+
+ for kw_list in queries.get("biorxiv_keywords", [])[:3]:
+ papers = _biorxiv_search(kw_list, max_results=10, days=60)
+ all_papers.extend(papers)
+ time.sleep(0.5)
+
+ # Deduplicate by title similarity (simple)
+ seen_titles = set()
+ unique_papers = []
+ for p in all_papers:
+ title_key = p["title"].lower()[:60]
+ if title_key not in seen_titles:
+ seen_titles.add(title_key)
+ unique_papers.append(p)
+
+ logger.info(f"Deep research found {len(unique_papers)} unique papers")
+
+ # Step 3: Synthesize
+ brief = self._synthesize(theme, unique_papers)
+
+ return {
+ "research_brief": brief,
+ "papers": unique_papers,
+ "queries_used": queries,
+ }
+
+ def _generate_queries(self, theme: dict, anchor_summary: str) -> dict | None:
+ """Ask Claude to generate targeted search queries."""
+ prompt = QUERY_GEN_PROMPT.format(
+ theme_name=theme["name"],
+ theme_description=theme["description"],
+ anchor_summary=anchor_summary,
+ )
+ try:
+ response = self.client.messages.create(
+ model=AGENT_MODEL,
+ max_tokens=800,
+ messages=[{"role": "user", "content": prompt}],
+ )
+ text = response.content[0].text
+
+ # Extract JSON
+ json_start = text.find("{")
+ json_end = text.rfind("}") + 1
+ if json_start >= 0 and json_end > json_start:
+ return json.loads(text[json_start:json_end])
+ except Exception as e:
+ logger.warning(f"Query generation failed: {e}")
+ return None
+
+ def _synthesize(self, theme: dict, papers: list[dict]) -> str:
+ """Have Claude synthesize discovered papers into a research brief."""
+ if not papers:
+ return "(No papers found in deep research — falling back to broad theme description.)"
+
+ papers_formatted = "\n\n".join(
+ f"[{p.get('source', '')}] {p.get('title', '')}\n"
+ f" Authors: {p.get('authors', 'N/A')}\n"
+ f" Date: {p.get('date', 'N/A')} | Journal: {p.get('journal', p.get('category', 'N/A'))}\n"
+ + (f" Abstract: {p['abstract'][:250]}..." if p.get("abstract") else "")
+ for p in papers[:25] # cap at 25 to stay within context
+ )
+
+ prompt = SYNTHESIS_PROMPT.format(
+ theme_name=theme["name"],
+ theme_description=theme["description"],
+ papers_formatted=papers_formatted,
+ )
+
+ try:
+ response = self.client.messages.create(
+ model=AGENT_MODEL,
+ max_tokens=1500,
+ messages=[{"role": "user", "content": prompt}],
+ )
+ return response.content[0].text
+ except Exception as e:
+ logger.warning(f"Synthesis failed: {e}")
+ return "(Synthesis failed — using raw paper list.)"
diff --git a/src/ideaspark/literature.py b/src/ideaspark/literature.py
new file mode 100644
index 0000000..8e27fe0
--- /dev/null
+++ b/src/ideaspark/literature.py
@@ -0,0 +1,239 @@
+"""Literature monitoring — pull recent papers from bioRxiv, arXiv, PubMed."""
+
+import json
+import logging
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import requests
+
+from src.config import PROJECT_ROOT
+
+logger = logging.getLogger(__name__)
+
+CACHE_DIR = PROJECT_ROOT / "data" / "ideaspark" / "literature_cache"
+
+
+class LiteratureMonitor:
+ """Fetch and cache recent preprints and papers from multiple sources."""
+
+ def __init__(self, window_days: int = 30):
+ self.window_days = window_days
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+ # ── bioRxiv ───────────────────────────────────────────────────────
+
+ def fetch_biorxiv(self, categories: list[str] | None = None, limit: int = 100) -> list[dict]:
+ """Fetch recent bioRxiv preprints via the API.
+
+ Categories: genomics, bioinformatics, cancer_biology, systems_biology,
+ molecular_biology, cell_biology, genetics
+ """
+ if categories is None:
+ categories = [
+ "genomics", "bioinformatics", "cancer_biology",
+ "systems_biology", "molecular_biology",
+ ]
+
+ end = datetime.now()
+ start = end - timedelta(days=self.window_days)
+ date_range = f"{start.strftime('%Y-%m-%d')}/{end.strftime('%Y-%m-%d')}"
+
+ papers = []
+ for cat in categories:
+ url = f"https://api.biorxiv.org/details/biorxiv/{date_range}/0/50"
+ try:
+ resp = requests.get(url, timeout=30)
+ resp.raise_for_status()
+ data = resp.json()
+ for item in data.get("collection", []):
+ if item.get("category", "").lower().replace(" ", "_") == cat or not categories:
+ papers.append({
+ "source": "biorxiv",
+ "title": item.get("title", ""),
+ "abstract": item.get("abstract", ""),
+ "authors": item.get("authors", ""),
+ "doi": item.get("doi", ""),
+ "date": item.get("date", ""),
+ "category": item.get("category", ""),
+ "url": f"https://doi.org/{item.get('doi', '')}",
+ })
+ time.sleep(1) # rate limit
+ except Exception as e:
+ logger.warning(f"bioRxiv fetch failed for {cat}: {e}")
+
+ logger.info(f"bioRxiv: fetched {len(papers)} papers")
+ return papers[:limit]
+
+ # ── arXiv ─────────────────────────────────────────────────────────
+
+ def fetch_arxiv(self, categories: list[str] | None = None, limit: int = 100) -> list[dict]:
+ """Fetch recent arXiv preprints via the Atom API.
+
+ Categories: cs.LG, cs.AI, cs.CL, q-bio.GN, q-bio.QM
+ """
+ if categories is None:
+ categories = ["cs.LG", "cs.AI", "cs.CL", "q-bio.GN", "q-bio.QM"]
+
+ cat_query = "+OR+".join([f"cat:{c}" for c in categories])
+ url = (
+ f"http://export.arxiv.org/api/query"
+ f"?search_query={cat_query}"
+ f"&sortBy=submittedDate&sortOrder=descending"
+ f"&max_results={limit}"
+ )
+
+ papers = []
+ try:
+ resp = requests.get(url, timeout=30)
+ resp.raise_for_status()
+
+ # Simple XML parsing (avoid heavy dependency)
+ import xml.etree.ElementTree as ET
+ root = ET.fromstring(resp.text)
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
+
+ for entry in root.findall("atom:entry", ns):
+ title = entry.findtext("atom:title", "", ns).strip().replace("\n", " ")
+ abstract = entry.findtext("atom:summary", "", ns).strip().replace("\n", " ")
+ authors = [a.findtext("atom:name", "", ns) for a in entry.findall("atom:author", ns)]
+ published = entry.findtext("atom:published", "", ns)[:10]
+ link = ""
+ for l in entry.findall("atom:link", ns):
+ if l.get("type") == "text/html":
+ link = l.get("href", "")
+ break
+
+ arxiv_cats = [c.get("term", "") for c in entry.findall("atom:category", ns)]
+
+ papers.append({
+ "source": "arxiv",
+ "title": title,
+ "abstract": abstract,
+ "authors": ", ".join(authors),
+ "date": published,
+ "category": ", ".join(arxiv_cats[:3]),
+ "url": link,
+ })
+
+ except Exception as e:
+ logger.warning(f"arXiv fetch failed: {e}")
+
+ logger.info(f"arXiv: fetched {len(papers)} papers")
+ return papers[:limit]
+
+ # ── PubMed ────────────────────────────────────────────────────────
+
+ def fetch_pubmed(self, queries: list[str] | None = None, limit: int = 50) -> list[dict]:
+ """Fetch recent PubMed articles via E-utilities.
+
+ Default queries target cancer + AI/ML intersections.
+ """
+ if queries is None:
+ queries = [
+ "(foundation model OR language model) AND (biology OR genomics OR protein)",
+ "(single cell) AND (deep learning OR neural network OR foundation model)",
+ "(RNA) AND (machine learning OR deep learning) AND (structure OR therapeutics)",
+ "(perturbation OR CRISPR screen) AND (machine learning OR prediction)",
+ ]
+
+ end = datetime.now()
+ start = end - timedelta(days=self.window_days)
+ mindate = start.strftime("%Y/%m/%d")
+ maxdate = end.strftime("%Y/%m/%d")
+
+ all_pmids = set()
+ for query in queries:
+ try:
+ search_url = (
+ "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
+ f"?db=pubmed&retmode=json&retmax={limit}"
+ f"&datetype=pdat&mindate={mindate}&maxdate={maxdate}"
+ f"&term={requests.utils.quote(query)}"
+ )
+ resp = requests.get(search_url, timeout=30)
+ resp.raise_for_status()
+ data = resp.json()
+ pmids = data.get("esearchresult", {}).get("idlist", [])
+ all_pmids.update(pmids)
+ time.sleep(0.5)
+ except Exception as e:
+ logger.warning(f"PubMed search failed for query '{query[:40]}...': {e}")
+
+ if not all_pmids:
+ return []
+
+ # Fetch summaries
+ papers = []
+ pmid_list = list(all_pmids)[:limit]
+ try:
+ fetch_url = (
+ "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
+ f"?db=pubmed&retmode=json&id={','.join(pmid_list)}"
+ )
+ resp = requests.get(fetch_url, timeout=30)
+ resp.raise_for_status()
+ data = resp.json()
+
+ for pmid in pmid_list:
+ info = data.get("result", {}).get(pmid, {})
+ if not info or pmid == "uids":
+ continue
+ authors = [a.get("name", "") for a in info.get("authors", [])]
+ papers.append({
+ "source": "pubmed",
+ "title": info.get("title", ""),
+ "abstract": "", # summaries don't include abstracts; fetched separately if needed
+ "authors": ", ".join(authors[:5]),
+ "date": info.get("pubdate", ""),
+ "journal": info.get("fulljournalname", ""),
+ "pmid": pmid,
+ "doi": next(
+ (aid.get("value", "") for aid in info.get("articleids", []) if aid.get("idtype") == "doi"),
+ "",
+ ),
+ "url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
+ })
+ except Exception as e:
+ logger.warning(f"PubMed fetch failed: {e}")
+
+ logger.info(f"PubMed: fetched {len(papers)} papers")
+ return papers
+
+ # ── combined fetch ────────────────────────────────────────────────
+
+ def fetch_all(self, limit_per_source: int = 50) -> list[dict]:
+ """Fetch from all sources and return combined list."""
+ papers = []
+ papers.extend(self.fetch_biorxiv(limit=limit_per_source))
+ papers.extend(self.fetch_arxiv(limit=limit_per_source))
+ papers.extend(self.fetch_pubmed(limit=limit_per_source))
+ logger.info(f"Total new literature: {len(papers)} papers")
+ return papers
+
+ # ── caching ───────────────────────────────────────────────────────
+
+ def cache_papers(self, papers: list[dict], date_str: str | None = None):
+ """Save fetched papers to daily cache."""
+ if date_str is None:
+ date_str = datetime.now().strftime("%Y-%m-%d")
+ path = CACHE_DIR / f"{date_str}.json"
+ with open(path, "w") as f:
+ json.dump(papers, f, indent=2)
+ logger.info(f"Cached {len(papers)} papers to {path}")
+
+ def load_cache(self, date_str: str | None = None) -> list[dict]:
+ """Load cached papers for a given date."""
+ if date_str is None:
+ date_str = datetime.now().strftime("%Y-%m-%d")
+ path = CACHE_DIR / f"{date_str}.json"
+ if path.exists():
+ with open(path) as f:
+ return json.load(f)
+ return []
+
+ def has_today_cache(self) -> bool:
+ """Check if we already fetched today."""
+ path = CACHE_DIR / f"{datetime.now().strftime('%Y-%m-%d')}.json"
+ return path.exists()
diff --git a/src/ideaspark/memory.py b/src/ideaspark/memory.py
new file mode 100644
index 0000000..1df14c0
--- /dev/null
+++ b/src/ideaspark/memory.py
@@ -0,0 +1,233 @@
+"""Feedback loop and preference learning for IdeaSpark."""
+
+import json
+import logging
+from datetime import datetime
+from pathlib import Path
+
+import numpy as np
+
+from src.config import PROJECT_ROOT
+
+logger = logging.getLogger(__name__)
+
+DATA_DIR = PROJECT_ROOT / "data" / "ideaspark"
+IDEA_LOG_PATH = DATA_DIR / "idea_log.json"
+PREFS_PATH = DATA_DIR / "preferences.json"
+EMBEDDINGS_PATH = DATA_DIR / "idea_embeddings.npy"
+
+
+class IdeaMemory:
+ """Tracks pitched ideas, feedback, and learned preferences."""
+
+ def __init__(self):
+ self.idea_log: list[dict] = []
+ self.preferences: dict = {
+ "theme_weights": {}, # theme → cumulative score
+ "method_affinities": {}, # method keyword → score
+ "biology_affinities": {}, # biology keyword → score
+ "strategy_weights": {"A": 0.0, "B": 0.0},
+ "total_ideas": 0,
+ "total_fire": 0,
+ "total_thumbsdown": 0,
+ }
+ self._embeddings: np.ndarray | None = None # (N, dim) array, lazy-loaded
+ self._load()
+
+ # ── persistence ───────────────────────────────────────────────────
+
+ def _load(self):
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
+ if IDEA_LOG_PATH.exists():
+ with open(IDEA_LOG_PATH) as f:
+ self.idea_log = json.load(f)
+ # Migrate: strip any leftover embeddings from JSON
+ for entry in self.idea_log:
+ entry.pop("embedding", None)
+ if PREFS_PATH.exists():
+ with open(PREFS_PATH) as f:
+ self.preferences = json.load(f)
+ if EMBEDDINGS_PATH.exists():
+ self._embeddings = np.load(EMBEDDINGS_PATH)
+
+ def save(self):
+ with open(IDEA_LOG_PATH, "w") as f:
+ json.dump(self.idea_log, f, indent=2)
+ with open(PREFS_PATH, "w") as f:
+ json.dump(self.preferences, f, indent=2)
+ if self._embeddings is not None:
+ np.save(EMBEDDINGS_PATH, self._embeddings)
+
+ def _append_embedding(self, embedding: list[float] | np.ndarray):
+ """Append one embedding vector to the .npy store."""
+ vec = np.array(embedding, dtype=np.float32).reshape(1, -1)
+ if self._embeddings is None:
+ self._embeddings = vec
+ else:
+ self._embeddings = np.vstack([self._embeddings, vec])
+
+ # ── idea logging ──────────────────────────────────────────────────
+
+ def log_idea(
+ self,
+ idea_number: int,
+ theme: str,
+ strategy: str,
+ title: str,
+ brief: str,
+ scores: dict,
+ source_papers: list[str],
+ new_papers: list[str],
+ is_stretch: bool = False,
+ slack_ts: str | None = None,
+ embedding: list[float] | None = None,
+ ):
+ """Log a pitched idea."""
+ entry = {
+ "id": idea_number,
+ "date": datetime.now().isoformat(),
+ "theme": theme,
+ "strategy": strategy,
+ "title": title,
+ "brief": brief,
+ "scores": scores,
+ "source_papers": source_papers,
+ "new_papers": new_papers,
+ "is_stretch": is_stretch,
+ "slack_ts": slack_ts,
+ "reaction": None, # filled in by feedback collection
+ }
+ self.idea_log.append(entry)
+ if embedding is not None:
+ self._append_embedding(embedding)
+ self.preferences["total_ideas"] = len(self.idea_log)
+ self.save()
+ return entry
+
+ # ── feedback processing ───────────────────────────────────────────
+
+ def record_feedback(self, idea_id: int, reaction: str):
+ """Record a feedback reaction for a given idea.
+
+ reaction: "fire" (🔥), "thinking" (🤔), "thumbsdown" (👎), or None
+
+ 👎 triggers full deletion — idea is purged from log and embeddings.
+ """
+ for idx, entry in enumerate(self.idea_log):
+ if entry["id"] == idea_id:
+ if reaction == "thumbsdown":
+ self._update_preferences(entry, reaction)
+ self._delete_idea(idx)
+ logger.info(f"Idea #{idea_id} thumbed down — deleted from log and embeddings")
+ return True
+ entry["reaction"] = reaction
+ self._update_preferences(entry, reaction)
+ self.save()
+ return True
+ return False
+
+ def _delete_idea(self, idx: int):
+ """Remove idea at index from log and its embedding row, then save."""
+ self.idea_log.pop(idx)
+ if self._embeddings is not None and idx < len(self._embeddings):
+ self._embeddings = np.delete(self._embeddings, idx, axis=0)
+ if len(self._embeddings) == 0:
+ self._embeddings = None
+ self.preferences["total_ideas"] = len(self.idea_log)
+ self.save()
+
+ def _update_preferences(self, entry: dict, reaction: str):
+ """Update preference weights based on feedback."""
+ if reaction == "fire":
+ delta = 1.0
+ self.preferences["total_fire"] += 1
+ elif reaction == "thumbsdown":
+ delta = -1.0
+ self.preferences["total_thumbsdown"] += 1
+ else: # thinking or None
+ delta = 0.0
+ return
+
+ # theme weight
+ theme = entry.get("theme", "")
+ tw = self.preferences["theme_weights"]
+ tw[theme] = tw.get(theme, 0.0) + delta
+
+ # strategy weight
+ strategy = entry.get("strategy", "")
+ if strategy in self.preferences["strategy_weights"]:
+ self.preferences["strategy_weights"][strategy] += delta
+
+ # ── deduplication ─────────────────────────────────────────────────
+
+ def is_duplicate(self, embedding: list[float] | np.ndarray, threshold: float = 0.88) -> bool:
+ """Check if a proposed idea is too similar to a past one."""
+ if self._embeddings is None or len(self._embeddings) == 0:
+ return False
+
+ q = np.array(embedding, dtype=np.float32)
+ # Vectorised cosine similarity against all past embeddings
+ norms = np.linalg.norm(self._embeddings, axis=1) * np.linalg.norm(q) + 1e-9
+ sims = self._embeddings @ q / norms
+ max_idx = int(np.argmax(sims))
+ max_sim = float(sims[max_idx])
+ if max_sim > threshold:
+ # Find the idea number for logging (index may not align perfectly
+ # if some ideas lacked embeddings, but best effort)
+ idea_id = self.idea_log[max_idx]["id"] if max_idx < len(self.idea_log) else "?"
+ logger.info(f"Duplicate detected: similarity {max_sim:.3f} with idea #{idea_id}")
+ return True
+ return False
+
+ # ── preference queries ────────────────────────────────────────────
+
+ def get_preferred_themes(self, top_k: int = 3) -> list[str]:
+ """Return themes ranked by cumulative feedback score."""
+ tw = self.preferences.get("theme_weights", {})
+ if not tw:
+ return []
+ sorted_themes = sorted(tw.items(), key=lambda x: x[1], reverse=True)
+ return [t for t, _ in sorted_themes[:top_k]]
+
+ def get_preferred_strategy(self) -> str:
+ """Return the strategy with higher cumulative score, or alternate."""
+ sw = self.preferences.get("strategy_weights", {})
+ if sw.get("A", 0) > sw.get("B", 0):
+ return "A"
+ elif sw.get("B", 0) > sw.get("A", 0):
+ return "B"
+ # alternate based on total ideas
+ return "A" if self.preferences.get("total_ideas", 0) % 2 == 0 else "B"
+
+ def get_idea_count(self) -> int:
+ return len(self.idea_log)
+
+ def should_be_stretch(self) -> bool:
+ """~1 in 7 ideas should be a stretch."""
+ count = len(self.idea_log)
+ return count > 0 and count % 7 == 0
+
+ def generate_meta_summary(self) -> str | None:
+ """After 30+ ideas, summarize preference patterns."""
+ if len(self.idea_log) < 30:
+ return None
+
+ fire_ideas = [e for e in self.idea_log if e.get("reaction") == "fire"]
+ down_ideas = [e for e in self.idea_log if e.get("reaction") == "thumbsdown"]
+
+ fire_themes = {}
+ for e in fire_ideas:
+ t = e.get("theme", "unknown")
+ fire_themes[t] = fire_themes.get(t, 0) + 1
+
+ lines = [
+ f"## IdeaSpark Meta-Summary (after {len(self.idea_log)} ideas)",
+ f"- 🔥 {len(fire_ideas)} | 👎 {len(down_ideas)} | Total {len(self.idea_log)}",
+ f"- Hit rate: {len(fire_ideas)/len(self.idea_log)*100:.0f}%",
+ "",
+ "### Top themes (by 🔥 count):",
+ ]
+ for theme, count in sorted(fire_themes.items(), key=lambda x: x[1], reverse=True)[:5]:
+ lines.append(f" - {theme}: {count} 🔥")
+
+ return "\n".join(lines)
diff --git a/src/ideaspark/notion_archive.py b/src/ideaspark/notion_archive.py
new file mode 100644
index 0000000..09fffa5
--- /dev/null
+++ b/src/ideaspark/notion_archive.py
@@ -0,0 +1,124 @@
+"""Notion archiving — push 🔥-rated ideas to the existing Project ideas database."""
+
+import logging
+from datetime import datetime
+
+from src.config import NOTION_API_KEY
+
+logger = logging.getLogger(__name__)
+
+# Existing "Project ideas" database on Notion
+# https://www.notion.so/arcinstitute/Project-ideas-1e6062f5e0e8809598d8f4775fd6aa59
+NOTION_DB_ID = "202062f5-e0e8-8089-9e81-c3e2fe2edd87"
+
+# Data source (collection) ID for direct API calls
+NOTION_DATA_SOURCE_ID = "202062f5-e0e8-800c-8374-000ba5b9424e"
+
+
+def _get_notion_client():
+ """Lazy import and init of Notion client."""
+ try:
+ from notion_client import Client
+ if not NOTION_API_KEY:
+ logger.warning("NOTION_API_KEY not set — archiving disabled")
+ return None
+ return Client(auth=NOTION_API_KEY)
+ except ImportError:
+ logger.warning("notion-client not installed — archiving disabled")
+ return None
+
+
+def _chunk_rich_text(text: str, limit: int = 2000) -> list[dict]:
+ """Split text into Notion rich_text blocks of ≤ limit characters each."""
+ blocks = []
+ for i in range(0, len(text), limit):
+ blocks.append({"text": {"content": text[i : i + limit]}})
+ return blocks
+
+
+def _score_to_priority(scores: dict) -> str:
+ """Map average score to priority level."""
+ avg = sum(scores.values()) / max(len(scores), 1)
+ if avg >= 4:
+ return "High"
+ elif avg >= 3:
+ return "Medium"
+ return "Low"
+
+
+def _build_notes(
+ brief: str,
+ theme: str,
+ strategy: str,
+ scores: dict,
+ is_stretch: bool,
+ idea_number: int,
+ date_str: str,
+) -> str:
+ """Pack IdeaSpark metadata into the Notes field."""
+ strategy_label = "A: papers × new lit" if strategy == "A" else "B: papers × trends"
+ stretch_tag = " [STRETCH]" if is_stretch else ""
+ header = (
+ f"IdeaSpark #{idea_number} — {date_str}{stretch_tag}\n"
+ f"Theme: {theme}\n"
+ f"Strategy: {strategy_label}\n"
+ f"Scores: N={scores.get('novelty', '?')}/5 · "
+ f"F={scores.get('feasibility', '?')}/5 · "
+ f"I={scores.get('impact', '?')}/5\n"
+ f"{'━' * 40}\n\n"
+ )
+ return header + brief
+
+
+def archive_idea(
+ idea_number: int,
+ title: str,
+ date_str: str,
+ theme: str,
+ strategy: str,
+ scores: dict,
+ brief: str,
+ is_stretch: bool = False,
+ database_id: str = NOTION_DB_ID,
+) -> str | None:
+ """Archive a 🔥-rated idea to the Project ideas Notion database.
+
+ Maps to existing schema:
+ Name → idea title
+ Notes → full brief + metadata (theme, strategy, scores)
+ Wet/Dry → "Dry" (or "Hybrid" for stretch)
+ priority → High/Medium/Low from avg score
+ Lead → empty (user assigns)
+ """
+ notion = _get_notion_client()
+ if not notion:
+ return None
+
+ notes_text = _build_notes(
+ brief=brief,
+ theme=theme,
+ strategy=strategy,
+ scores=scores,
+ is_stretch=is_stretch,
+ idea_number=idea_number,
+ date_str=date_str,
+ )
+
+ wet_dry = "Hybrid" if is_stretch else "Dry"
+ priority = _score_to_priority(scores)
+
+ try:
+ page = notion.pages.create(
+ parent={"database_id": database_id},
+ properties={
+ "Name": {"title": [{"text": {"content": title}}]},
+ "Notes": {"rich_text": _chunk_rich_text(notes_text)},
+ "Wet/Dry": {"select": {"name": wet_dry}},
+ "priority": {"select": {"name": priority}},
+ },
+ )
+ logger.info(f"Archived idea #{idea_number} to Notion: {page['id']}")
+ return page["id"]
+ except Exception as e:
+ logger.error(f"Failed to archive idea to Notion: {e}")
+ return None
diff --git a/src/ideaspark/todoist_archive.py b/src/ideaspark/todoist_archive.py
new file mode 100644
index 0000000..012a85a
--- /dev/null
+++ b/src/ideaspark/todoist_archive.py
@@ -0,0 +1,128 @@
+"""Todoist archiving — push 🔥-rated ideas to the 'Project ideas' project."""
+
+import logging
+
+import requests
+
+from src.config import TODOIST_API_KEY
+
+logger = logging.getLogger(__name__)
+
+TODOIST_API_BASE = "https://api.todoist.com/api/v1"
+
+# Set via TODOIST_PROJECT_ID env var (see config.py)
+from src.config import TODOIST_PROJECT_ID
+
+
+def _get_headers() -> dict | None:
+ """Return auth headers, or None if key missing."""
+ if not TODOIST_API_KEY:
+ logger.warning("TODOIST_API_KEY not set — archiving disabled")
+ return None
+ return {
+ "Authorization": f"Bearer {TODOIST_API_KEY}",
+ "Content-Type": "application/json",
+ }
+
+
+def _score_to_priority(scores: dict) -> int:
+ """Map average score to Todoist priority (4=urgent, 1=low).
+
+ Todoist priorities are inverted: 4 is highest, 1 is lowest.
+ """
+ avg = sum(scores.values()) / max(len(scores), 1)
+ if avg >= 4:
+ return 4 # urgent
+ elif avg >= 3:
+ return 3 # high
+ elif avg >= 2:
+ return 2 # medium
+ return 1 # low
+
+
+def _build_description(
+ brief: str,
+ theme: str,
+ strategy: str,
+ scores: dict,
+ is_stretch: bool,
+ idea_number: int,
+ date_str: str,
+) -> str:
+ """Pack IdeaSpark metadata into the task description."""
+ strategy_label = "A: papers × new lit" if strategy == "A" else "B: papers × trends"
+ stretch_tag = " [STRETCH]" if is_stretch else ""
+ header = (
+ f"IdeaSpark #{idea_number} — {date_str}{stretch_tag}\n"
+ f"Theme: {theme}\n"
+ f"Strategy: {strategy_label}\n"
+ f"Scores: N={scores.get('novelty', '?')}/5 · "
+ f"F={scores.get('feasibility', '?')}/5 · "
+ f"I={scores.get('impact', '?')}/5\n"
+ f"{'━' * 40}\n\n"
+ )
+ return header + brief
+
+
+def archive_idea(
+ idea_number: int,
+ title: str,
+ date_str: str,
+ theme: str,
+ strategy: str,
+ scores: dict,
+ brief: str,
+ is_stretch: bool = False,
+) -> str | None:
+ """Archive a 🔥-rated idea to the 'Project ideas' Todoist project.
+
+ Returns the task ID on success, None on failure.
+ """
+ headers = _get_headers()
+ if not headers:
+ return None
+
+ if not TODOIST_PROJECT_ID:
+ logger.warning("TODOIST_PROJECT_ID not set — archiving disabled")
+ return None
+
+ description = _build_description(
+ brief=brief,
+ theme=theme,
+ strategy=strategy,
+ scores=scores,
+ is_stretch=is_stretch,
+ idea_number=idea_number,
+ date_str=date_str,
+ )
+
+ priority = _score_to_priority(scores)
+
+ # Labels for filtering
+ labels = ["ideaspark", theme.lower().replace(" ", "-")]
+ if is_stretch:
+ labels.append("stretch")
+
+ payload = {
+ "content": title,
+ "description": description,
+ "project_id": TODOIST_PROJECT_ID,
+ "priority": priority,
+ "labels": labels,
+ }
+
+ try:
+ resp = requests.post(
+ f"{TODOIST_API_BASE}/tasks",
+ headers=headers,
+ json=payload,
+ timeout=15,
+ )
+ resp.raise_for_status()
+ task = resp.json()
+ task_id = task["id"]
+ logger.info(f"Archived idea #{idea_number} to Todoist: {task_id}")
+ return task_id
+ except requests.RequestException as e:
+ logger.error(f"Failed to archive idea to Todoist: {e}")
+ return None
diff --git a/src/indexers/todoist_indexer.py b/src/indexers/todoist_indexer.py
index 8f15b67..0ee7bef 100644
--- a/src/indexers/todoist_indexer.py
+++ b/src/indexers/todoist_indexer.py
@@ -115,6 +115,7 @@ def index_delta(self, hours_back: int = 24) -> dict[str, Any]:
stats = {
"tasks_synced": 0,
+ "tasks_indexed": 0,
"errors": 0,
}
@@ -128,7 +129,8 @@ def index_delta(self, hours_back: int = 24) -> dict[str, Any]:
for task in tasks:
project_name = project_map.get(task.get("project_id"), "Inbox")
self._index_task(task, project_name, stats)
- stats["tasks_synced"] += 1
+
+ stats["tasks_synced"] = stats["tasks_indexed"]
except Exception as e:
logger.error(f"Error in Todoist delta sync: {e}")
diff --git a/src/integrations/gcalendar.py b/src/integrations/gcalendar.py
index f308f54..1ba68a9 100644
--- a/src/integrations/gcalendar.py
+++ b/src/integrations/gcalendar.py
@@ -35,6 +35,12 @@ def service(self):
self._service = build("calendar", "v3", credentials=creds)
return self._service
+ def close(self):
+ """Close the underlying HTTP connection."""
+ if self._service is not None:
+ self._service.close()
+ self._service = None
+
def list_calendars(self) -> list[dict]:
"""List all calendars the user has access to."""
try:
diff --git a/src/integrations/gdocs.py b/src/integrations/gdocs.py
index 2ed547a..28a0798 100644
--- a/src/integrations/gdocs.py
+++ b/src/integrations/gdocs.py
@@ -28,6 +28,15 @@ def __init__(self, account: str):
self._drive_service = None
self._docs_service = None
+ def close(self):
+ """Close underlying HTTP connections."""
+ if self._drive_service is not None:
+ self._drive_service.close()
+ self._drive_service = None
+ if self._docs_service is not None:
+ self._docs_service.close()
+ self._docs_service = None
+
@property
def drive_service(self):
"""Lazily initialize the Drive service (for comments)."""
diff --git a/src/integrations/gdrive.py b/src/integrations/gdrive.py
index 0c44399..a1acf09 100644
--- a/src/integrations/gdrive.py
+++ b/src/integrations/gdrive.py
@@ -54,6 +54,12 @@ def service(self):
self._service = build("drive", "v3", credentials=creds)
return self._service
+ def close(self):
+ """Close the underlying HTTP connection."""
+ if self._service is not None:
+ self._service.close()
+ self._service = None
+
def get_about(self) -> dict:
"""Get user info and storage quota."""
return (
diff --git a/src/integrations/gmail.py b/src/integrations/gmail.py
index 90942e7..5c8852a 100644
--- a/src/integrations/gmail.py
+++ b/src/integrations/gmail.py
@@ -36,6 +36,12 @@ def service(self):
self._service = build("gmail", "v1", credentials=creds)
return self._service
+ def close(self):
+ """Close the underlying HTTP connection."""
+ if self._service is not None:
+ self._service.close()
+ self._service = None
+
def get_profile(self) -> dict:
"""Get user profile information."""
return self.service.users().getProfile(userId="me").execute()
diff --git a/src/integrations/google_multi.py b/src/integrations/google_multi.py
index 89010eb..cc74a47 100644
--- a/src/integrations/google_multi.py
+++ b/src/integrations/google_multi.py
@@ -2,11 +2,12 @@
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
-from datetime import datetime, timedelta
+from datetime import datetime
from typing import Any
from ..config import GOOGLE_ACCOUNTS, GOOGLE_EMAILS, GOOGLE_TIER1, GOOGLE_TIER2, get_user_timezone
from .gcalendar import CalendarClient
+from .gdocs import DocsClient
from .gdrive import DriveClient
from .gmail import GmailClient
from .google_auth import check_all_accounts, get_credentials
@@ -22,6 +23,22 @@ def __init__(self):
self._gmail_clients: dict[str, GmailClient] = {}
self._drive_clients: dict[str, DriveClient] = {}
self._calendar_clients: dict[str, CalendarClient] = {}
+ self._docs_clients: dict[str, DocsClient] = {}
+
+ def close(self):
+ """Close all cached clients and release HTTP connections."""
+ for client in self._gmail_clients.values():
+ client.close()
+ for client in self._drive_clients.values():
+ client.close()
+ for client in self._calendar_clients.values():
+ client.close()
+ for client in self._docs_clients.values():
+ client.close()
+ self._gmail_clients.clear()
+ self._drive_clients.clear()
+ self._calendar_clients.clear()
+ self._docs_clients.clear()
def get_gmail_client(self, account: str) -> GmailClient | None:
"""Get or create a Gmail client for an account."""
@@ -50,6 +67,15 @@ def get_calendar_client(self, account: str) -> CalendarClient | None:
return None
return self._calendar_clients.get(account)
+ def get_docs_client(self, account: str) -> DocsClient | None:
+ """Get or create a Google Docs client for an account."""
+ if account not in self._docs_clients:
+ if get_credentials(account):
+ self._docs_clients[account] = DocsClient(account)
+ else:
+ return None
+ return self._docs_clients.get(account)
+
def get_authenticated_accounts(self) -> list[str]:
"""Get list of accounts with valid credentials."""
status = check_all_accounts()
@@ -529,3 +555,90 @@ def create_calendar_event(
location=location,
send_notifications=send_notifications,
)
+
+ def update_calendar_event(
+ self,
+ account: str,
+ event_id: str,
+ calendar_id: str = "primary",
+ send_notifications: bool = True,
+ **updates: Any,
+ ) -> dict[str, Any]:
+ """Update a calendar event in a specific account."""
+ client = self.get_calendar_client(account)
+ if not client:
+ raise ValueError(f"No Calendar client available for account: {account}")
+
+ return client.update_event(
+ event_id=event_id,
+ calendar_id=calendar_id,
+ send_notifications=send_notifications,
+ **updates,
+ )
+
+ def delete_calendar_event(
+ self,
+ account: str,
+ event_id: str,
+ calendar_id: str = "primary",
+ send_notifications: bool = True,
+ ) -> None:
+ """Delete a calendar event in a specific account."""
+ client = self.get_calendar_client(account)
+ if not client:
+ raise ValueError(f"No Calendar client available for account: {account}")
+
+ client.delete_event(
+ event_id=event_id,
+ calendar_id=calendar_id,
+ send_notifications=send_notifications,
+ )
+
+ def add_doc_comment(
+ self,
+ account: str,
+ document_id: str,
+ content: str,
+ quoted_text: str | None = None,
+ ) -> dict[str, Any]:
+ """Add a comment to a Google Doc from a specific account."""
+ client = self.get_docs_client(account)
+ if not client:
+ raise ValueError(f"No Docs client available for account: {account}")
+
+ return client.add_comment(
+ document_id=document_id,
+ content=content,
+ quoted_text=quoted_text,
+ )
+
+ def reply_to_doc_comment(
+ self,
+ account: str,
+ document_id: str,
+ comment_id: str,
+ content: str,
+ ) -> dict[str, Any]:
+ """Reply to a Google Doc comment from a specific account."""
+ client = self.get_docs_client(account)
+ if not client:
+ raise ValueError(f"No Docs client available for account: {account}")
+
+ return client.reply_to_comment(
+ document_id=document_id,
+ comment_id=comment_id,
+ content=content,
+ )
+
+ def resolve_doc_comment(
+ self,
+ account: str,
+ document_id: str,
+ comment_id: str,
+ ) -> dict[str, Any]:
+ """Resolve a Google Doc comment from a specific account."""
+ client = self.get_docs_client(account)
+ if not client:
+ raise ValueError(f"No Docs client available for account: {account}")
+
+ return client.resolve_comment(document_id=document_id, comment_id=comment_id)
diff --git a/src/integrations/todoist_client.py b/src/integrations/todoist_client.py
index b389bdb..e0913bb 100644
--- a/src/integrations/todoist_client.py
+++ b/src/integrations/todoist_client.py
@@ -10,7 +10,7 @@
logger = logging.getLogger(__name__)
-TODOIST_BASE_URL = "https://api.todoist.com/rest/v2"
+TODOIST_BASE_URL = "https://api.todoist.com/api/v1"
class TodoistClient:
@@ -71,6 +71,61 @@ def _request(
return {}
return response.json()
+ def _extract_results(
+ self,
+ payload: dict[str, Any] | list[dict[str, Any]],
+ ) -> tuple[list[dict[str, Any]], str | None]:
+ """Normalize list responses for both legacy and unified API shapes."""
+ if isinstance(payload, list):
+ return payload, None
+
+ if isinstance(payload, dict):
+ if "results" in payload and isinstance(payload["results"], list):
+ return payload["results"], payload.get("next_cursor")
+ return [payload], None
+
+ return [], None
+
+ def _extract_single(self, payload: dict[str, Any] | list[dict[str, Any]]) -> dict[str, Any]:
+ """Normalize single-object responses across API shapes."""
+ if isinstance(payload, dict):
+ if "results" in payload and isinstance(payload["results"], list):
+ return payload["results"][0] if payload["results"] else {}
+ return payload
+
+ if isinstance(payload, list):
+ return payload[0] if payload else {}
+
+ return {}
+
+ def _request_paginated(
+ self,
+ endpoint: str,
+ params: dict[str, Any] | None = None,
+ page_limit: int = 200,
+ ) -> list[dict[str, Any]]:
+ """Fetch all pages from cursor-paginated endpoints."""
+ all_results: list[dict[str, Any]] = []
+ query: dict[str, Any] = dict(params or {})
+ query["limit"] = page_limit
+ cursor: str | None = None
+
+ while True:
+ if cursor:
+ query["cursor"] = cursor
+ elif "cursor" in query:
+ del query["cursor"]
+
+ payload = self._request("GET", endpoint, params=query)
+ results, next_cursor = self._extract_results(payload)
+ all_results.extend(results)
+
+ if not next_cursor:
+ break
+ cursor = next_cursor
+
+ return all_results
+
def test_connection(self) -> dict[str, Any]:
"""Test the connection to Todoist API.
@@ -96,7 +151,7 @@ def list_projects(self) -> list[dict[str, Any]]:
Returns:
List of project objects.
"""
- result = self._request("GET", "/projects")
+ result = self._request_paginated("/projects")
return [self._parse_project(p) for p in result]
def get_project(self, project_id: str) -> dict[str, Any]:
@@ -109,7 +164,7 @@ def get_project(self, project_id: str) -> dict[str, Any]:
Project object.
"""
result = self._request("GET", f"/projects/{project_id}")
- return self._parse_project(result)
+ return self._parse_project(self._extract_single(result))
# --- Tasks ---
@@ -127,13 +182,15 @@ def list_tasks(
Returns:
List of task objects.
"""
- params = {}
- if project_id:
- params["project_id"] = project_id
if filter:
- params["filter"] = filter
-
- result = self._request("GET", "/tasks", params=params if params else None)
+ result = self._request_paginated("/tasks/filter", params={"query": filter})
+ tasks = [self._parse_task(t) for t in result]
+ if project_id:
+ tasks = [t for t in tasks if t.get("project_id") == project_id]
+ return tasks
+
+ params = {"project_id": project_id} if project_id else {}
+ result = self._request_paginated("/tasks", params=params)
return [self._parse_task(t) for t in result]
def get_task(self, task_id: str) -> dict[str, Any]:
@@ -146,7 +203,7 @@ def get_task(self, task_id: str) -> dict[str, Any]:
Task object.
"""
result = self._request("GET", f"/tasks/{task_id}")
- return self._parse_task(result)
+ return self._parse_task(self._extract_single(result))
def create_task(
self,
@@ -188,7 +245,7 @@ def create_task(
body["labels"] = labels
result = self._request("POST", "/tasks", json=body)
- return self._parse_task(result)
+ return self._parse_task(self._extract_single(result))
def update_task(
self,
@@ -225,7 +282,7 @@ def update_task(
body["labels"] = labels
result = self._request("POST", f"/tasks/{task_id}", json=body)
- return self._parse_task(result)
+ return self._parse_task(self._extract_single(result))
def complete_task(self, task_id: str) -> bool:
"""Mark a task as complete.
@@ -271,7 +328,7 @@ def list_labels(self) -> list[dict[str, Any]]:
Returns:
List of label objects.
"""
- result = self._request("GET", "/labels")
+ result = self._request_paginated("/labels")
return [self._parse_label(l) for l in result]
# --- Comments ---
@@ -285,7 +342,7 @@ def list_comments(self, task_id: str) -> list[dict[str, Any]]:
Returns:
List of comment objects.
"""
- result = self._request("GET", "/comments", params={"task_id": task_id})
+ result = self._request_paginated("/comments", params={"task_id": task_id})
return [self._parse_comment(c) for c in result]
def add_comment(self, task_id: str, content: str) -> dict[str, Any]:
@@ -301,7 +358,7 @@ def add_comment(self, task_id: str, content: str) -> dict[str, Any]:
result = self._request(
"POST", "/comments", json={"task_id": task_id, "content": content}
)
- return self._parse_comment(result)
+ return self._parse_comment(self._extract_single(result))
# --- Parsing Helpers ---
@@ -312,7 +369,9 @@ def _parse_project(self, project: dict) -> dict[str, Any]:
"name": project["name"],
"color": project.get("color"),
"is_favorite": project.get("is_favorite", False),
- "is_inbox_project": project.get("is_inbox_project", False),
+ "is_inbox_project": project.get(
+ "inbox_project", project.get("is_inbox_project", False)
+ ),
"view_style": project.get("view_style", "list"),
"url": project.get("url"),
}
@@ -331,7 +390,7 @@ def _parse_task(self, task: dict) -> dict[str, Any]:
return {
"id": task["id"],
- "content": task["content"],
+ "content": task.get("content") or task.get("title", ""),
"description": task.get("description", ""),
"project_id": task.get("project_id"),
"priority": task.get("priority", 1),
diff --git a/src/integrations/zotero_client.py b/src/integrations/zotero_client.py
index ea30aa7..a43ab44 100644
--- a/src/integrations/zotero_client.py
+++ b/src/integrations/zotero_client.py
@@ -605,12 +605,20 @@ def _extract_doi_from_url(self, url: str) -> str | None:
if match:
return f"10.1038/{match.group(1)}"
+ # ScienceDirect: sciencedirect.com/science/article/pii/S...
+ # Uses PII not DOI in URL — look up via CrossRef
+ if "sciencedirect.com/" in url:
+ match = re.search(r"/pii/(S[\dX]+)", url)
+ if match:
+ doi = self._resolve_pii_to_doi(match.group(1))
+ if doi:
+ return doi
+
# Cell/Elsevier: cell.com/cell/fulltext/S0092-8674(24)00123-4
if "cell.com/" in url:
- match = re.search(r"cell\.com/[^/]+/fulltext/(S[\d-]+\(\d+\)[\d-]+)", url)
- if match:
- # Cell DOIs are complex, try CrossRef lookup by URL
- pass
+ doi = self._extract_doi_from_page(url)
+ if doi:
+ return doi
# Science: science.org/doi/10.1126/science.xxx
if "science.org/doi/" in url:
@@ -654,6 +662,59 @@ def _extract_doi_from_url(self, url: str) -> str | None:
if match:
return match.group(1).rstrip("/")
+ # Last resort: fetch DOI from page meta tags
+ return self._extract_doi_from_page(url)
+
+ def _extract_doi_from_page(self, url: str) -> str | None:
+ """Fetch the DOI from a page's meta tags (citation_doi, dc.identifier)."""
+ import re
+ import httpx
+
+ try:
+ with httpx.Client() as client:
+ resp = client.get(
+ url,
+ headers={"User-Agent": "Mozilla/5.0"},
+ follow_redirects=True,
+ timeout=15.0,
+ )
+ if resp.status_code != 200:
+ return None
+
+ html = resp.text
+ for attr in ("citation_doi", "dc.identifier", "dc.Identifier"):
+ for pattern in [
+ rf' str | None:
+ """Resolve an Elsevier PII to a DOI via CrossRef search."""
+ import httpx
+
+ try:
+ resp = httpx.get(
+ f"https://api.crossref.org/works?query={pii}&rows=1",
+ timeout=10.0,
+ )
+ if resp.status_code == 200:
+ items = resp.json().get("message", {}).get("items", [])
+ if items:
+ return items[0].get("DOI")
+ except Exception as e:
+ logger.debug(f"CrossRef PII lookup failed for {pii}: {e}")
+
return None
def _resolve_pmid_to_doi(self, pmid: str) -> str | None:
diff --git a/src/semantic/embedder.py b/src/semantic/embedder.py
index 5d9d0ad..1c8809c 100644
--- a/src/semantic/embedder.py
+++ b/src/semantic/embedder.py
@@ -4,7 +4,6 @@
import json
import logging
from pathlib import Path
-from typing import Any
from openai import OpenAI
from tenacity import retry, stop_after_attempt, wait_exponential
@@ -101,7 +100,7 @@ def embed_batch(self, texts: list[str]) -> list[list[float]]:
if to_embed:
new_embeddings = self._embed_batch(to_embed)
- for idx, embedding in zip(to_embed_indices, new_embeddings):
+ for idx, embedding in zip(to_embed_indices, new_embeddings, strict=True):
results.append((idx, embedding))
# Cache result
@@ -175,15 +174,21 @@ def _cache_key(self, text: str) -> str:
def _load_cache(self) -> None:
"""Load embedding cache from disk."""
- cache_file = self.cache_dir / "cache_index.json"
- if cache_file.exists():
+ loaded = 0
+ for entry_file in self.cache_dir.glob("*.json"):
+ if entry_file.name == "cache_index.json":
+ continue
try:
- with open(cache_file) as f:
- index = json.load(f)
- # Only load index, embeddings loaded on demand
- logger.info(f"Loaded embedding cache index with {len(index)} entries")
+ with open(entry_file) as f:
+ embedding = json.load(f)
+ if isinstance(embedding, list):
+ self._cache[entry_file.stem] = embedding
+ loaded += 1
except Exception as e:
- logger.warning(f"Error loading cache index: {e}")
+ logger.warning(f"Error loading embedding cache entry {entry_file.name}: {e}")
+
+ if loaded:
+ logger.info(f"Loaded {loaded} embedding cache entries")
def _save_cache_entry(self, key: str, embedding: list[float]) -> None:
"""Save a single cache entry to disk."""
diff --git a/src/semantic/semantic_indexer.py b/src/semantic/semantic_indexer.py
index 5d82692..e23608d 100644
--- a/src/semantic/semantic_indexer.py
+++ b/src/semantic/semantic_indexer.py
@@ -277,6 +277,7 @@ def _chunk_content(self, content: dict) -> list[Chunk]:
base_metadata = self._sanitize_metadata({
"source_id": content["id"],
"source_type": content_type,
+ "source": content.get("source"),
"title": title,
"source_account": content.get("source_account"),
"timestamp": str(content.get("timestamp")) if content.get("timestamp") else None,
@@ -340,7 +341,7 @@ def _embed_and_store_chunks(
documents = []
metadatas = []
- for chunk, embedding in zip(batch_chunks, embeddings):
+ for chunk, _embedding in zip(batch_chunks, embeddings, strict=True):
source_id = chunk.metadata.get("source_id", "unknown")
chunk_id = f"{source_id}:chunk:{chunk.chunk_index}"
diff --git a/tests/test_executor.py b/tests/test_executor.py
index 99e0a39..44fa4f6 100644
--- a/tests/test_executor.py
+++ b/tests/test_executor.py
@@ -1,21 +1,22 @@
"""Tests for agent executor."""
-from datetime import datetime, timezone
-from unittest.mock import MagicMock, patch, AsyncMock
+from datetime import datetime, timedelta
+from unittest.mock import MagicMock, patch
+from zoneinfo import ZoneInfo
import pytest
+from src.bot.conversation import ConversationContext
from src.bot.executor import (
+ MAX_ITERATIONS,
+ SYSTEM_PROMPT,
AgentExecutor,
- ToolExecutor,
ExecutionResult,
StreamEvent,
StreamEventType,
- MAX_ITERATIONS,
- SYSTEM_PROMPT,
+ ToolExecutor,
)
from src.bot.tools import ToolResult
-from src.bot.conversation import ConversationContext
class TestToolExecutor:
@@ -86,6 +87,41 @@ def test_execute_get_calendar_events(self, mock_google, executor):
assert result.success is True
assert result.data["event_count"] == 1
+ assert result.data["upcoming_event_count"] == 1
+ assert result.data["next_event"]["summary"] == "Team Meeting"
+
+ @patch("src.bot.executor.ToolExecutor.multi_google", new_callable=MagicMock)
+ def test_execute_get_calendar_events_filters_past_events(
+ self,
+ mock_google,
+ executor,
+ ):
+ """Test calendar events tool separates all events from upcoming events."""
+ now = datetime.now(ZoneInfo("America/Los_Angeles"))
+ mock_google.get_all_calendars_for_date.return_value = [
+ {
+ "id": "past",
+ "summary": "Already Happened",
+ "start": now - timedelta(hours=2),
+ "end": now - timedelta(hours=1),
+ },
+ {
+ "id": "future",
+ "summary": "Next Meeting",
+ "start": now + timedelta(hours=1),
+ "end": now + timedelta(hours=2),
+ },
+ ]
+
+ result = executor.execute("GetCalendarEventsTool", {"date": "today"})
+
+ assert result.success is True
+ assert result.data["event_count"] == 2
+ assert result.data["upcoming_event_count"] == 1
+ assert result.data["next_event"]["summary"] == "Next Meeting"
+ assert [event["summary"] for event in result.data["upcoming_events"]] == [
+ "Next Meeting"
+ ]
@patch("src.bot.executor.ToolExecutor.multi_google", new_callable=MagicMock)
def test_execute_check_availability(self, mock_google, executor):
@@ -103,6 +139,56 @@ def test_execute_check_availability(self, mock_google, executor):
assert result.success is True
assert result.data["free_slot_count"] == 2
+ def test_execute_update_calendar_event_queues_confirmation(self, executor):
+ """Test calendar updates are confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "UpdateCalendarEventTool",
+ {
+ "event_id": "event123",
+ "account": "arc",
+ "calendar_id": "primary",
+ "title": "Updated Meeting",
+ "date": "tomorrow",
+ "time": "2pm",
+ "duration_minutes": 30,
+ },
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert "confirmation" in result.data
+ assert context.pending_action is not None
+
+ def test_execute_update_calendar_event_requires_updates(self, executor):
+ """Test calendar update rejects empty updates."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "UpdateCalendarEventTool",
+ {"event_id": "event123", "account": "arc"},
+ context=context,
+ )
+
+ assert result.success is False
+ assert "No calendar event updates" in result.error
+
+ def test_execute_delete_calendar_event_queues_confirmation(self, executor):
+ """Test calendar cancellation is confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "DeleteCalendarEventTool",
+ {"event_id": "event123", "account": "arc"},
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
+
@patch("src.bot.executor.ToolExecutor.multi_google", new_callable=MagicMock)
def test_execute_get_unread_counts(self, mock_google, executor):
"""Test executing unread counts tool."""
@@ -117,20 +203,21 @@ def test_execute_get_unread_counts(self, mock_google, executor):
assert result.data["total_unread"] == 15
assert result.data["by_account"]["arc"] == 5
- @patch("src.bot.executor.ToolExecutor.multi_google", new_callable=MagicMock)
- def test_execute_create_email_draft(self, mock_google, executor):
- """Test executing create email draft tool."""
- mock_google.create_draft.return_value = {"id": "draft123"}
+ def test_execute_create_email_draft(self, executor):
+ """Test create email draft queues a confirmation action."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
result = executor.execute("CreateEmailDraftTool", {
"to": "test@example.com",
"subject": "Test Subject",
"body": "Test body",
"account": "arc",
- })
+ }, context=context)
assert result.success is True
- assert result.data["draft_id"] == "draft123"
+ assert result.data["requires_confirmation"] is True
+ assert "confirmation" in result.data
+ assert context.pending_action is not None
def test_execute_send_email_is_blocked(self, executor):
"""Test direct send-email tool is blocked defensively."""
@@ -212,23 +299,169 @@ def test_execute_search_github_code_in_repo(self, mock_github, executor):
mock_github.search_code_in_repo.assert_called_once()
assert result.success is True
- @patch("src.bot.executor.ToolExecutor.github_client", new_callable=MagicMock)
- def test_execute_create_github_issue(self, mock_github, executor):
- """Test executing create GitHub issue tool."""
- mock_github.create_issue.return_value = {
- "number": 42,
- "html_url": "https://github.com/owner/repo/issues/42",
- }
+ def test_execute_create_github_issue(self, executor):
+ """Test create GitHub issue queues a confirmation action."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
result = executor.execute("CreateGitHubIssueTool", {
"repo": "owner/repo",
"title": "New Issue",
"body": "Issue description",
"labels": ["bug"],
- })
+ }, context=context)
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert "confirmation" in result.data
+ assert context.pending_action is not None
+
+ @patch("src.bot.executor.ToolExecutor.todoist_client", new_callable=MagicMock)
+ def test_execute_update_todoist_task_queues_confirmation(self, mock_todoist, executor):
+ """Test Todoist task updates are confirmation-gated."""
+ mock_todoist.get_task.return_value = {"id": "task123", "content": "Old task"}
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "UpdateTodoistTaskTool",
+ {
+ "task_id": "task123",
+ "content": "New task",
+ "due": "tomorrow",
+ "priority": 4,
+ },
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
+
+ def test_execute_add_todoist_comment_queues_confirmation(self, executor):
+ """Test Todoist comments are confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "AddTodoistCommentTool",
+ {"task_id": "task123", "content": "Follow up with Alice."},
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
+
+ def test_execute_reopen_todoist_task_queues_confirmation(self, executor):
+ """Test reopening Todoist tasks is confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "ReopenTodoistTaskTool",
+ {"task_id": "task123"},
+ context=context,
+ )
assert result.success is True
- assert result.data["issue_number"] == 42
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
+
+ def test_execute_add_google_doc_comment_queues_confirmation(self, executor):
+ """Test Google Doc comments are confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "AddGoogleDocCommentTool",
+ {
+ "document_id": "doc123",
+ "content": "Please clarify this section.",
+ "quoted_text": "ambiguous sentence",
+ "account": "arc",
+ },
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
+
+ def test_execute_reply_google_doc_comment_queues_confirmation(self, executor):
+ """Test Google Doc comment replies are confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "ReplyGoogleDocCommentTool",
+ {
+ "document_id": "doc123",
+ "comment_id": "comment123",
+ "content": "Resolved in the latest draft.",
+ "account": "arc",
+ },
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
+
+ def test_execute_resolve_google_doc_comment_queues_confirmation(self, executor):
+ """Test Google Doc comment resolution is confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "ResolveGoogleDocCommentTool",
+ {
+ "document_id": "doc123",
+ "comment_id": "comment123",
+ "account": "arc",
+ },
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
+
+ def test_execute_get_proactive_settings_requires_context(self, executor):
+ """Test proactive settings reads require Slack context."""
+ result = executor.execute("GetProactiveSettingsTool", {})
+
+ assert result.success is False
+ assert "Missing conversation context" in result.error
+
+ @patch("src.bot.executor.ToolExecutor.proactive_settings", new_callable=MagicMock)
+ def test_execute_get_proactive_settings(self, mock_settings_store, executor):
+ """Test reading proactive settings for the current user."""
+ mock_settings = MagicMock()
+ mock_settings.to_dict.return_value = {
+ "user_id": "U1",
+ "daily_briefing_enabled": True,
+ }
+ mock_settings_store.get.return_value = mock_settings
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute("GetProactiveSettingsTool", {}, context=context)
+
+ assert result.success is True
+ assert result.data["user_id"] == "U1"
+ mock_settings_store.get.assert_called_once_with("U1")
+
+ def test_execute_update_proactive_settings_queues_confirmation(self, executor):
+ """Test proactive settings updates are confirmation-gated."""
+ context = ConversationContext(user_id="U1", channel_id="C1")
+
+ result = executor.execute(
+ "UpdateProactiveSettingsTool",
+ {
+ "daily_briefing_enabled": False,
+ "briefing_hour": 8,
+ "quiet_hours_start": 22,
+ "quiet_hours_end": 7,
+ },
+ context=context,
+ )
+
+ assert result.success is True
+ assert result.data["requires_confirmation"] is True
+ assert context.pending_action is not None
@patch("src.bot.executor.ToolExecutor.query_engine", new_callable=MagicMock)
def test_execute_find_person(self, mock_engine, executor):
@@ -522,7 +755,7 @@ def test_run_with_user_memory(self, mock_anthropic, mock_context):
mock_memory.get_context_summary.return_value = "User prefers Arc email"
executor = AgentExecutor(api_key="test-key", user_memory=mock_memory)
- result = executor.run("Check my email", mock_context)
+ executor.run("Check my email", mock_context)
# Verify memory was queried
mock_memory.get_context_summary.assert_called_once_with(mock_context.user_id)