From 2b21637b5acbf4df6d88b32dc7930d749452518f Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Wed, 18 Mar 2026 11:20:16 -0700
Subject: [PATCH 01/13] chore: plan for refactor

---
 ...message_ids_refactor_plan_9eb1b08f.plan.md | 336 ++++++++++++++++++
 1 file changed, 336 insertions(+)
 create mode 100644 .cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md

diff --git a/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md b/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md
new file mode 100644
index 000000000..50ff977a7
--- /dev/null
+++ b/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md
@@ -0,0 +1,336 @@
+---
+name: message_ids refactor plan
+overview: Overhaul MIRIX message management by removing the message_ids JSON array from Agent, eliminating redundant system message storage, and introducing in-memory message accumulation for agent steps. This resolves scaling bottlenecks, removes write contention, and eliminates wasteful write-then-delete churn.
+todos:
+  - id: in-memory-accumulator
+    content: Refactor Agent.step() and inner_step() to accumulate messages in-memory during the step loop instead of persisting after each inner_step
+    status: pending
+  - id: system-message-from-agent
+    content: Construct system message on the fly from agent_state.system instead of storing it as a Message row
+    status: pending
+  - id: orm-changes
+    content: Add message_type to Message ORM; remove message_ids from Agent ORM; change messages relationship to lazy=noload
+    status: pending
+  - id: schema-changes
+    content: Update Pydantic schemas for Agent (remove message_ids) and Message (add message_type)
+    status: pending
+  - id: agent-manager-rewrite
+    content: Rewrite AgentManager message methods to use query-based retrieval ordered by created_at, id
+    status: pending
+  - id: message-manager-updates
+    content: Add query-based fetch and bulk soft-delete methods; remove detached message cleanup
+    status: pending
+  - id: llm-api-layer
+    content: Update Anthropic and other LLM clients that assume messages[0] is the system message
+    status: pending
+  - id: retention-config
+    content: Add message_retention_count to Client ORM/schema; implement retention enforcement at end of step()
+    status: pending
+  - id: cleanup-managers
+    content: Replace message_ids manipulation in UserManager, ClientManager with bulk message soft-delete
+    status: pending
+  - id: api-client-sdk
+    content: Update REST API, server, SDK, and client layers to remove message_ids references
+    status: pending
+  - id: ecms-sync
+    content: Remove message_ids from ECMS IPSR agents entity
+    status: pending
+  - id: migration
+    content: Create Alembic migration to add message_type, message_retention_count, indexes, soft-delete system messages, and drop message_ids
+    status: pending
+  - id: tests
+    content: Update and add tests for new message management patterns
+    status: pending
+isProject: false
+---
+
+# Proposal: MIRIX Message Management Overhaul
+
+## 1. Problem
+
+MIRIX manages agent conversation history through a `message_ids` JSON column on the `agents` table. This is a flat array of message IDs representing the agent's "in-context memory." The design has several scaling and correctness problems:
+
+**Scaling bottleneck.** One meta-agent (and its sub-agents) exists per client, shared across all end-users. The `message_ids` array on a single agent row accumulates message IDs for every user. Every message operation (append, trim, clear) requires a read-modify-write of this array on the agent row, creating write contention when multiple workers process messages for different users concurrently.
+
+**Lost message state updates under concurrent load.** Because the `message_ids` array is read-modify-written as a whole, concurrent processing of messages for different users on the same agent causes lost updates. Worker A reads `message_ids`, Worker B reads the same `message_ids`, both append their respective message IDs, and whichever writes last silently overwrites the other's changes. This is a classic lost-update anomaly. Under production load — where a single agent processes messages for hundreds of millions of users simultaneously — this means message references are silently dropped, leading to missing conversation context, orphaned message rows, and non-deterministic agent behavior.
+
+**Eager loading hazard.** The `messages` relationship on the Agent ORM uses `lazy="selectin"`, which means loading an agent eagerly loads *all* of its messages into memory. For an agent serving millions of users, this is a ticking time bomb.
+
+**Redundant system message storage.** The system prompt is stored twice: once in `agent.system` (a column on the agent row) and once as a `Message` row at position 0 of `message_ids`. The code reads the system prompt from the message row, enriches it with memories, and sends it to the LLM. The `agent.system` column is the source of truth but the message row is what gets used at runtime.
+
+**Write-then-delete churn.** For the ECMS memory extraction path (the production use case), every agent step persists messages to the database, then immediately deletes them when `CLEAR_HISTORY_AFTER_MEMORY_UPDATE` fires. Sub-agents each store their own copies of the input messages, their LLM responses, tool results, and heartbeat messages -- all of which are deleted moments later. This is pure I/O overhead.
+
+## 2. Changes
+
+This proposal makes four interconnected changes:
+
+### 2.1 Store Message History In-Memory only
+
+Today, each call to `inner_step()` persists all new messages to the database via `append_to_in_context_messages`, then the next `inner_step()` reads them back via `get_in_context_messages` to build the LLM context. For memory extraction agents, these messages are written and then mostly deleted when `CLEAR_HISTORY_AFTER_MEMORY_UPDATE` fires.
+
+Each sub-agent (episodic, semantic, etc.) stores **its own redundant copy** of the input messages, LLM responses, tool results, heartbeat messages and message summaries in its `message_id` property.
+
+After a memory agent finishes, the history clearing logic resets the `message_ids` to only include: the system message & the current message-set that just processed. The heartbeats, tool calls, and any previously processed messages are cleared.
+
+**Change:**
+
+The `step()` loop will maintain an in-memory message list. Each `inner_step()` appends to this list instead of writing to the database. Messages are only persisted at the end of the `step()` loop, and only when the client's retention policy calls for it (see section 2.4)
+
+This eliminates:
+
+- All write-then-delete churn for the ECMS path
+- Redundant message copies across sub-agents (each sub-agent currently stores its own copy of the input)
+- The `save_agent()` call that writes `message_ids` after every step
+- The `delete_detached_messages_for_agent()` cleanup
+
+Observability is not affected: the `steps` table and LangFuse traces still capture all LLM interactions.
+
+For chaining (multiple steps in one `step()` call), the in-memory list grows across steps within the same invocation. The LLM sees the full conversation history without any database round-trips between steps.
+
+
+
+### 2.2 Store System Message Exclusively in Agent State
+
+Today, the system prompt is stored as a `Message` row with `role="system"` at position 0 of `message_ids`. The code in `inner_step()` reads it back, enriches it with retrieved memories, and mutates it in-memory before sending to the LLM. The `rebuild_system_prompt` method creates a new Message row and swaps `message_ids[0]` every time the prompt changes.
+
+**Change:**
+
+The system prompt will live exclusively in `agent_state.system`. When building the LLM message list, `inner_step()` constructs a system `Message` object on the fly from `agent_state.system`, enriches it with memories, and prepends it. No system message is stored in the messages table.
+
+This eliminates:
+
+- The duplicate storage of the system prompt
+- The `rebuild_system_prompt` dance of creating a new message row and swapping array positions
+- The convention that `messages[0]` is always the system message (a source of fragile assumptions across the codebase)
+- The `get_system_message()` method (callers read `agent_state.system` directly)
+
+
+
+### 2.3 Remove `message_ids` From Agent
+
+The `message_ids` JSON column on the `agents` table will be removed entirely. This is the main goal of this plan. For agent types that persist messages (retention > 0), conversation history is retrieved by querying the `messages` table directly, scoped by `(agent_id, user_id)` and ordered by `created_at`.
+
+**Ordering strategy:** `ORDER BY created_at, id`
+
+- `created_at` reflects processing order, which is what the LLM actually saw. Kafka already guarantees in-order delivery per user (messages are partitioned by `user_id`), so processing order matches real-world order.
+- `id` is the tiebreaker for deterministic ordering when timestamps match (a practically impossible edge-case).
+
+The `messages` relationship on the Agent ORM also changes from `lazy="selectin"` to `lazy="noload"` to prevent accidental eager loading.
+
+
+
+### 2.4 Configurable Message Retention Per Client
+
+Today, the history clearing behavior is hardcoded: after memory extraction, keep the system message + one "last edited memory item" summary + the most recent input message-set. Different clients have different needs:
+
+- A **batch client** (like ECMS) that sends an entire conversation thread as a single `save` call has no use for retained messages. It wants `N=0`.
+- An **interactive agent** that processes messages one at a time may benefit from seeing what it did in the last few invocations. It wants `N=5` or similar.
+
+**Change:** Add a `message_retention_count` field to the Client model. This integer controls how many recent **input message-sets** are retained in the database after processing.
+
+A **message-set** is defined as the input messages from a single `step()` invocation — i.e., the conversation content that was sent to the agent for processing. It does not include the agent's internal working messages (assistant responses, tool calls, tool results, heartbeats). Those exist only in the in-memory accumulator during the step and are not persisted.
+
+- `message_retention_count = 0` -- No messages persisted. All agent work is in-memory only. (Default for memory extraction clients.)
+- `message_retention_count = N` -- Keep the N most recent input message-sets per `(agent_id, user_id)`. Older sets are soft-deleted.
+
+When retention >= 1, the start of a `step()` invocation loads the retained input message-sets from the DB into the in-memory accumulator, giving the agent context about what it processed recently. If the accumulated in-memory context grows large (high N or large messages), summarization compresses older messages in the in-memory list. The post-summary state is what gets persisted at the end.
+
+This replaces:
+
+- The `CLEAR_HISTORY_AFTER_MEMORY_UPDATE` environment variable (a global boolean)
+- The hardcoded "keep system message + last edited item" behavior
+- The per-agent-type branching logic that builds the "last edited memory item" summary
+
+**Note:** The MIRIX chat agent (`chat_agent` type) is a known casualty of this change. It requires retention of full step outputs (including assistant responses, tool calls, and tool results) for conversational continuity. This will be addressed in a follow-up.
+
+**Schema change:** Add `message_retention_count` (nullable Integer, default 0) to `[mirix/orm/client.py](mirix/orm/client.py)` and `[mirix/schemas/client.py](mirix/schemas/client.py)`.
+
+### 2.5 Add `message_type` to Messages
+
+A `message_type` column (`String`, nullable) is added to the `messages` table. It distinguishes message origins: `"original"` for normal messages, `"summary"` for messages created by the summarizer. This replaces the implicit convention of identifying summaries by their content.
+
+Note: `occurred_at` is **not** added to the messages table. The real-world timestamp of a conversation is already stored where it matters — on the memory records themselves (episodic events, raw memories, etc.). Message ordering uses `created_at` (processing order), which is correct because Kafka guarantees in-order delivery per user and the LLM's context should reflect what it actually saw, not a reconstructed timeline.
+
+## 3. How It Works End-to-End
+
+### ECMS Memory Extraction (message_retention_count = 0)
+
+```
+1. POST /memory/add → put_messages() → Kafka
+2. Worker consumes → _process_message_async() → server.send_messages()
+3. MetaMemoryAgent.step():
+   - Constructs system message in-memory from agent_state.system
+   - Sends [system_msg, input_msg] to LLM
+   - LLM returns trigger_memory_update(["episodic", "semantic"])
+   - Accumulates assistant response + tool result in-memory (not persisted)
+4. trigger_memory_update() runs sub-agents in parallel:
+   - EpisodicMemoryAgent.step():
+     - Constructs system message in-memory
+     - Sends [system_msg, input_copy] to LLM
+     - LLM returns episodic_memory_insert(...)
+     - Tool executes → writes to episodic_events table (this IS persisted)
+     - Accumulates messages in-memory (not persisted)
+   - SemanticMemoryAgent.step(): same pattern
+5. retention_count=0 → no messages written to the messages table
+6. No agent row updates
+7. Kafka offset committed
+```
+
+### Interactive Agent (message_retention_count = 3)
+
+```
+1. Agent processes a message via step()
+2. Load retained input message-sets from DB into in-memory accumulator
+   → agent sees up to 3 prior input message-sets as context
+3. Construct system message from agent_state.system
+4. Send [system_msg, retained_inputs..., new_input] to LLM
+5. LLM responds, agent executes tools, all intermediate messages
+   accumulated in-memory (not persisted)
+6. At end of step(): persist the new input message-set to the messages table
+7. Enforce retention: soft-delete input message-sets older than the 3 most recent
+```
+
+### MIRIX Chat Agent (known broken — follow-up)
+
+The chat agent requires retention of full step outputs (assistant responses, tool calls, tool results) for conversational continuity. This is not supported by the input-message-set-only retention model. The chat agent will be addressed in a follow-up change.
+
+### Summarization (retention >= 1 with high N)
+
+When retention is high, the in-memory accumulator can grow large (retained input message-sets from prior invocations + new messages from the current step). If the token count exceeds the memory pressure threshold, summarization compresses the in-memory list:
+
+1. Detect memory pressure after an `inner_step()` completes
+2. Calculate cutoff using token counts on the in-memory list (same logic as today)
+3. Send older messages from the in-memory list to the LLM summarizer
+4. Replace the summarized messages in the in-memory list with a single summary message (`message_type="summary"`)
+5. Continue the `step()` loop with the reduced in-memory list
+6. At end of `step()`, persist the post-summary state to the DB (same as normal retention flow)
+
+No mid-step DB writes are needed. The summarizer works entirely on the in-memory list, and the final state — including the summary message — is what gets persisted at the end.
+
+For retention = 0, summarization never fires — there are no retained messages to accumulate, and memory extraction agents run a single step.
+
+## 4. Edge Cases and Special Considerations
+
+**In-memory message loss on crash.** If a worker crashes mid-step, in-memory messages are lost. For memory agents this is fine -- the messages were going to be deleted anyway, and the Kafka offset hasn't been committed (Phase 2 will add manual commit). For chat agents, this is a behavior change: today a crash mid-step leaves partial messages in the DB. With this change, a crash loses the entire step's messages. This is arguably better (no partial state) and aligns with the Kafka retry strategy in Phase 2.
+
+**Timestamp ties.** Two messages with the same `created_at` are disambiguated by `id`. In practice, messages within a step are created sequentially and differ by microseconds. The `id` tiebreaker gives a stable order for the rare tie case.
+
+**Chat agent is broken.** The MIRIX chat agent requires retention of full step outputs (assistant responses, tool calls, tool results) for conversational continuity. The input-message-set-only retention model does not support this. This is a known, accepted trade-off — the chat agent will be fixed in a follow-up.
+
+**Anthropic client assumption.** The Anthropic LLM client asserts `messages[0].role == "system"`. The caller (`inner_step`) will prepend the system message before passing to the LLM client, so this assertion continues to hold. The change is that the system message comes from `agent_state.system` rather than from a DB row.
+
+**Removal of "last edited memory item" summary.** Today, the history clearing code builds a per-agent-type summary message (e.g., "Last edited memory item: [Episodic Event ID]: ...") and keeps it as the sole retained message. With configurable retention, a client using `message_retention_count=1` retains the raw messages from the last invocation, which contain the same information in the tool results. The synthetic summary construction (lines 1273-1391 in `agent.py`) is removed. If the synthetic summary format is specifically needed, it can be reintroduced as an optional post-processing step, but the raw tool results are arguably more useful since they contain the full structured data.
+
+## 5. How This Sets Up Phase 2 (Kafka Durability, Idempotency, Retries)
+
+This refactor is Phase 1. Phase 2 will add manual Kafka offset commit, retry limits, and a dead-letter queue. The changes in this refactor are specifically designed to make Phase 2 straightforward.
+
+### 6.1 No Partial State Left Behind
+
+**Today's problem.** If a worker crashes mid-step, you get partial state: some messages are persisted in the messages table, some aren't; `message_ids` on the agent row may or may not have been updated; some memory inserts (episodic events, etc.) may have succeeded, others not. The Kafka offset is already auto-committed, so the message won't be retried.
+
+**After this refactor.** A crash mid-step leaves zero message state in the DB (for retention=0 clients). The only side effects are the actual memory writes (episodic events, semantic items, etc.). When Phase 2 switches to manual Kafka offset commit, a crash means the offset isn't committed, so the message gets redelivered. The retry sees a clean slate in the messages table — no partial message state to conflict with.
+
+### 6.2 No Agent Row Contention
+
+**Today's problem.** Every message operation does a read-modify-write on the agent row's `message_ids`. If two workers process messages for different users on the same agent concurrently, they race on the same row. With manual Kafka commit + retries, this gets worse — a retried message could interleave with a new message's processing.
+
+**After this refactor.** The agent row is never updated during message processing. Workers operating on different users are completely independent — they only touch the messages table, scoped by `(agent_id, user_id)`. Retries don't conflict with concurrent processing.
+
+### 6.3 Memory Writes Become the Idempotency Boundary
+
+With messages out of the picture, the only persistent side effects of processing a Kafka message are the actual memory writes:
+
+- `episodic_events` table inserts
+- `semantic_memory_items` table inserts
+- `resource_memory_items` table inserts
+- `procedural_memory_items` table inserts
+- `knowledge_vault_items` table inserts
+
+For Phase 2, these are the operations that need idempotency keys. A natural key would be the Kafka message offset + partition, or a hash of `(user_id, input_content)`. If a retry attempts to insert a memory that already exists (same idempotency key), it's a no-op.
+
+This refactor doesn't add idempotency keys yet, but it dramatically simplifies where they need to go. Instead of needing idempotency across messages table + agent row + memory tables, you only need it on the memory tables.
+
+## 6. Files to Modify
+
+### Agent Execution (`[mirix/agent/agent.py](mirix/agent/agent.py)`)
+
+The heaviest changes. Key modifications:
+
+- `inner_step()`: construct system message from `agent_state.system`; append new messages to an in-memory list instead of calling `append_to_in_context_messages`; load `in_context_messages` from the in-memory list (for chaining) or from DB query (for first step with retention > 0)
+- `step()`: maintain the in-memory message accumulator; at end of loop, check client's `message_retention_count` to decide whether/how much to persist; enforce retention limit by soft-deleting excess message-sets
+- `_handle_ai_response()`: remove the entire `should_clear_history` / `CLEAR_HISTORY_AFTER_MEMORY_UPDATE` block and the per-agent-type "last edited memory item" logic. Retention is now handled uniformly at the end of `step()` based on the client config.
+- `save_agent()`: remove `message_ids` write (this function may become a no-op or be removed)
+- `summarize_messages_inplace()`: rewrite to operate on the in-memory message list directly instead of reading/writing the DB. Calculate cutoff, call LLM summarizer, replace old messages with summary message in the list. No DB operations mid-step.
+
+### Agent Manager (`[mirix/services/agent_manager.py](mirix/services/agent_manager.py)`)
+
+Rewrite or remove message-related methods:
+
+- `get_in_context_messages()` -- query `messages` table by `(agent_id, user_id)`, no system message
+- `get_system_message()` -- return `agent_state.system` directly, or remove
+- `append_to_in_context_messages()` -- just create message rows (no agent row update). Only called for persistence at end of step.
+- `prepend_to_in_context_messages()` -- remove (summarizer creates messages directly; no need to "prepend" when ordering is by `created_at`)
+- `set_in_context_messages()` -- remove entirely
+- `trim_older_in_context_messages()` -- soft-delete older messages via query
+- `trim_all_in_context_messages_except_system()` -- rename to `clear_user_messages()`, soft-delete via query
+- `reset_messages()` -- soft-delete user's messages directly
+- `rebuild_system_prompt()` -- just update `agent.system` column
+- `_generate_initial_message_sequence()` -- no longer creates a system message row
+- Remove `message_ids` from `_update_agent()` and Redis cache serialization
+
+### Message Manager (`[mirix/services/message_manager.py](mirix/services/message_manager.py)`)
+
+- Add `get_messages_for_agent_user(agent_id, user_id)` -- query ordered by `created_at, id`
+- Add `soft_delete_user_messages(agent_id, user_id)` -- bulk soft-delete
+- Remove `delete_detached_messages_for_agent()` and `cleanup_all_detached_messages()`
+
+### ORM Models
+
+- `[mirix/orm/message.py](mirix/orm/message.py)` -- Add `message_type` column; add composite index on `(agent_id, user_id, is_deleted)`
+- `[mirix/orm/agent.py](mirix/orm/agent.py)` -- Remove `message_ids` column; change `messages` relationship to `lazy="noload"`
+- `[mirix/orm/client.py](mirix/orm/client.py)` -- Add `message_retention_count` column (nullable Integer, default 0)
+- `[mirix/orm/sqlalchemy_base.py](mirix/orm/sqlalchemy_base.py)` -- Remove `message_ids` from Redis cache serialization
+
+### Pydantic Schemas
+
+- `[mirix/schemas/agent.py](mirix/schemas/agent.py)` -- Remove `message_ids` from `AgentState` and `UpdateAgent`
+- `[mirix/schemas/message.py](mirix/schemas/message.py)` -- Add `message_type` field
+- `[mirix/schemas/client.py](mirix/schemas/client.py)` -- Add `message_retention_count` field
+
+### LLM API Layer
+
+- `[mirix/llm_api/anthropic_client.py](mirix/llm_api/anthropic_client.py)` -- Currently asserts `messages[0].role == "system"` and extracts it to a top-level param. Update to handle system message prepended by the caller.
+- `[mirix/llm_api/anthropic.py](mirix/llm_api/anthropic.py)` -- Same pattern.
+
+### Cleanup Managers
+
+- `[mirix/services/user_manager.py](mirix/services/user_manager.py)` -- Replace `agent.message_ids = [agent.message_ids[0]]` with bulk message soft-delete by `user_id`
+- `[mirix/services/client_manager.py](mirix/services/client_manager.py)` -- Same, by `client_id`
+
+### API / Client / SDK
+
+- `[mirix/server/rest_api.py](mirix/server/rest_api.py)` -- Remove `message_ids` from `UpdateAgentRequest`
+- `[mirix/server/server.py](mirix/server/server.py)` -- Update if `save_agent` changes
+- `[mirix/client/client.py](mirix/client/client.py)`, `[mirix/client/remote_client.py](mirix/client/remote_client.py)`, `[mirix/local_client/local_client.py](mirix/local_client/local_client.py)`, `[mirix/sdk.py](mirix/sdk.py)` -- Remove `message_ids` references
+
+### ECMS
+
+- `[context-and-memory-service/common/ipsr/entities/agents.py](context-and-memory-service/common/ipsr/entities/agents.py)` -- Remove `message_ids` from IPSR agents entity
+
+### Database Migration
+
+Alembic migration:
+
+1. Add `message_type` (nullable `String`) to `messages`
+2. Add `message_retention_count` (nullable `Integer`, default `0`) to `clients`
+3. Add composite index on `(agent_id, user_id, is_deleted)` to `messages`
+4. Soft-delete existing system messages (`role = 'system'`)
+5. Drop `message_ids` from `agents`
+
+### Tests
+
+- `[tests/test_message_handling.py](tests/test_message_handling.py)` -- Update for query-based retrieval
+- `[tests/test_agent_prompt_update.py](tests/test_agent_prompt_update.py)` -- Update for system message from `agent_state.system`
+- New tests for in-memory accumulation, in-memory summarization, and retention count behavior (0, N)
+

From a110d5b8429d358f1e4cc2a9af4b22c45a6e991d Mon Sep 17 00:00:00 2001
From: Jianhe Liao <jianhe_liao@intuit.com>
Date: Fri, 20 Mar 2026 09:38:26 -0700
Subject: [PATCH 02/13] Fix the missing greenlet issue (#66)

* Fix the missing greenlet issue

* Fix the Github CI pipeline errors

* Further fix Github CI pipeline errors on Redis test
---
 mirix/agent/agent.py                          |  16 +-
 mirix/agent/meta_agent.py                     |   4 +-
 mirix/agent/temporary_message_accumulator.py  |   6 +-
 mirix/agent/upload_manager.py                 |  26 +-
 mirix/client/remote_client.py                 |  35 +-
 mirix/constants.py                            |   4 +-
 mirix/database/filter_tags_query.py           |  37 +-
 mirix/database/redis_client.py                |  12 +-
 mirix/functions/function_sets/memory_tools.py |  11 +-
 mirix/functions/helpers.py                    |   4 +-
 mirix/functions/mcp_client/base_client.py     |   2 +-
 mirix/functions/mcp_client/gmail_client.py    | 115 ++----
 mirix/functions/mcp_client/manager.py         |   8 +-
 mirix/llm_api/anthropic_client.py             |   8 +-
 mirix/llm_api/azure_openai.py                 |   8 +-
 mirix/llm_api/azure_openai_client.py          |   5 +-
 mirix/llm_api/google_ai.py                    |   8 +-
 mirix/llm_api/google_ai_client.py             |  12 +-
 mirix/llm_api/helpers.py                      |   7 +-
 mirix/llm_api/llm_api_tools.py                |  59 +--
 mirix/llm_api/llm_client_base.py              |   8 +-
 mirix/llm_api/openai_client.py                |   5 +-
 mirix/local_client/local_client.py            |  66 +---
 mirix/orm/agent.py                            |  16 +-
 mirix/orm/base.py                             |   5 +-
 mirix/orm/block.py                            |   3 +-
 mirix/orm/custom_columns.py                   |   2 +-
 mirix/orm/episodic_memory.py                  |  28 ++
 mirix/orm/knowledge_vault.py                  |  25 ++
 mirix/orm/procedural_memory.py                |  24 ++
 mirix/orm/resource_memory.py                  |  24 ++
 mirix/orm/semantic_memory.py                  |  26 ++
 mirix/orm/sqlalchemy_base.py                  |  23 +-
 mirix/queue/kafka_queue.py                    |   9 +-
 mirix/queue/manager.py                        |   1 -
 mirix/queue/memory_queue.py                   |  21 +-
 mirix/queue/message_pb2.py                    |  64 ++--
 mirix/queue/message_pb2.pyi                   |  83 +++-
 mirix/queue/message_pb2_grpc.py               |  16 +-
 mirix/queue/queue_util.py                     |   2 -
 mirix/queue/worker.py                         |  16 +-
 mirix/schemas/memory.py                       |   2 +-
 mirix/schemas/message.py                      |   6 +-
 mirix/schemas/mirix_base.py                   |   3 +-
 .../schemas/openai/chat_completion_request.py |   2 -
 mirix/schemas/openai/chat_completions.py      |   2 -
 mirix/schemas/providers.py                    |   2 +-
 mirix/sdk.py                                  |  11 +-
 mirix/server/rest_api.py                      | 114 ++----
 mirix/server/server.py                        |  12 +-
 mirix/services/admin_user_manager.py          |  62 +--
 mirix/services/agent_manager.py               |  71 +++-
 mirix/services/block_manager.py               |   3 +-
 mirix/services/client_manager.py              |  77 +---
 mirix/services/cloud_file_mapping_manager.py  |  68 +---
 mirix/services/episodic_memory_manager.py     |  60 ++-
 mirix/services/file_manager.py                |  68 +---
 .../services/helpers/agent_manager_helper.py  |   5 +-
 mirix/services/knowledge_vault_manager.py     |  20 +-
 mirix/services/mcp_tool_registry.py           |  12 +-
 mirix/services/message_manager.py             |  12 +-
 mirix/services/organization_manager.py        |   8 +-
 mirix/services/procedural_memory_manager.py   |  22 +-
 mirix/services/provider_manager.py            |  12 +-
 mirix/services/raw_memory_manager.py          |  22 +-
 mirix/services/resource_memory_manager.py     |  40 +-
 mirix/services/semantic_memory_manager.py     |  30 +-
 mirix/services/tool_execution_sandbox.py      |  60 ++-
 mirix/services/tool_manager.py                |  16 +-
 mirix/services/user_manager.py                |  26 +-
 mirix/services/utils.py                       |   6 +-
 mirix/utils.py                                |   7 +-
 poetry.lock                                   | 357 +++++++++++-------
 pyproject.toml                                |  48 ++-
 tests/test_orm_to_pydantic_safe.py            | 347 +++++++++++++++++
 75 files changed, 1384 insertions(+), 1083 deletions(-)
 create mode 100644 tests/test_orm_to_pydantic_safe.py

diff --git a/mirix/agent/agent.py b/mirix/agent/agent.py
index 0fbbeb141..312efc65d 100644
--- a/mirix/agent/agent.py
+++ b/mirix/agent/agent.py
@@ -7,9 +7,9 @@
 from datetime import datetime
 from typing import Callable, List, Optional, Tuple, Union
 
+import httpx
 import numpy as np
 import pytz
-import httpx
 
 from mirix.agent.tool_validators import validate_tool_args
 from mirix.constants import (
@@ -68,6 +68,7 @@
 from mirix.services.semantic_memory_manager import SemanticMemoryManager
 from mirix.services.step_manager import StepManager
 from mirix.services.tool_execution_sandbox import ToolExecutionSandbox
+from mirix.services.user_manager import UserManager
 from mirix.settings import settings, summarizer_settings
 from mirix.system import (
     get_contine_chaining,
@@ -333,10 +334,7 @@ async def update_memory_if_changed(self, new_memory: Memory) -> bool:
                 auto_create_from_default=False,  # Don't auto-create here, only in step()
             )
             self.blocks_in_memory = Memory(
-                blocks=[
-                    await self.block_manager.get_block_by_id(block.id, user=self.user)
-                    for block in blocks_result
-                ]
+                blocks=[await self.block_manager.get_block_by_id(block.id, user=self.user) for block in blocks_result]
             )
 
             # NOTE: don't do this since re-buildin the memory is handled at the start of the step
@@ -1239,12 +1237,12 @@ async def _handle_ai_response(
                 if CLEAR_HISTORY_AFTER_MEMORY_UPDATE and not self.agent_state.is_type(AgentType.chat_agent):
                     if not chaining:
                         should_clear_history = True
-                        self.logger.info(f"should_clear_history=True (chaining=False)")
+                        self.logger.info("should_clear_history=True (chaining=False)")
                     else:
                         for func_name in executed_function_names:
                             if func_name == "finish_memory_update":
                                 should_clear_history = True
-                                self.logger.info(f"should_clear_history=True (finish_memory_update called)")
+                                self.logger.info("should_clear_history=True (finish_memory_update called)")
                                 break
                 else:
                     self.logger.debug(
@@ -1740,7 +1738,9 @@ async def build_system_prompt_with_memories(
 
         # Prepare embedding for semantic search
         if key_words != "" and search_method == "embedding":
-            embedded_text = await (await embedding_model(self.agent_state.embedding_config)).get_text_embedding(key_words)
+            embedded_text = await (await embedding_model(self.agent_state.embedding_config)).get_text_embedding(
+                key_words
+            )
             embedded_text = np.array(embedded_text)
             embedded_text = np.pad(
                 embedded_text,
diff --git a/mirix/agent/meta_agent.py b/mirix/agent/meta_agent.py
index 6392bd8bb..36a59d000 100644
--- a/mirix/agent/meta_agent.py
+++ b/mirix/agent/meta_agent.py
@@ -502,9 +502,7 @@ async def update_embedding_config(self, embedding_config: EmbeddingConfig):
 
         actor = None
         if self.client_id:
-            actor = await self.server.client_manager.get_client_by_id(
-                self.client_id
-            )
+            actor = await self.server.client_manager.get_client_by_id(self.client_id)
 
         for agent_state in self.memory_agent_states.get_all_agent_states_list():
             if agent_state is not None:
diff --git a/mirix/agent/temporary_message_accumulator.py b/mirix/agent/temporary_message_accumulator.py
index 2b6e86bef..0f30400ba 100644
--- a/mirix/agent/temporary_message_accumulator.py
+++ b/mirix/agent/temporary_message_accumulator.py
@@ -14,6 +14,8 @@
 from mirix.constants import CHAINING_FOR_MEMORY_UPDATE, CHAINING_FOR_META_AGENT
 from mirix.voice_utils import convert_base64_to_audio_segment, process_voice_files
 
+logger = logging.getLogger(__name__)
+
 
 def get_image_mime_type(image_path):
     """Get MIME type for image files."""
@@ -704,7 +706,9 @@ async def _cleanup_processed_content(self, ready_to_process, user_message_added)
                     for file_ref in item["image_uris"]:
                         if hasattr(file_ref, "name"):
                             try:
-                                await self.client.server.cloud_file_mapping_manager.set_processed(cloud_file_id=file_ref.name)
+                                await self.client.server.cloud_file_mapping_manager.set_processed(
+                                    cloud_file_id=file_ref.name
+                                )
                             except Exception:
                                 pass
 
diff --git a/mirix/agent/upload_manager.py b/mirix/agent/upload_manager.py
index f13be9bf2..2100bda40 100644
--- a/mirix/agent/upload_manager.py
+++ b/mirix/agent/upload_manager.py
@@ -39,9 +39,12 @@ async def _compress_image(self, image_path, quality=85, max_size=(1920, 1080)):
         if shutil.which("vipsthumbnail"):
             try:
                 process = await asyncio.create_subprocess_exec(
-                    "vipsthumbnail", image_path,
-                    "--size", f"{max_size[0]}x{max_size[1]}",
-                    "-o", f"{compressed_path}[Q={quality},optimize-coding,strip]",
+                    "vipsthumbnail",
+                    image_path,
+                    "--size",
+                    f"{max_size[0]}x{max_size[1]}",
+                    "-o",
+                    f"{compressed_path}[Q={quality},optimize-coding,strip]",
                     stdout=asyncio.subprocess.PIPE,
                     stderr=asyncio.subprocess.PIPE,
                 )
@@ -52,13 +55,15 @@ async def _compress_image(self, image_path, quality=85, max_size=(1920, 1080)):
 
                 logger.warning(
                     "vipsthumbnail failed for %s (rc=%d): %s",
-                    image_path, process.returncode,
+                    image_path,
+                    process.returncode,
                     stderr_bytes.decode() if stderr_bytes else "",
                 )
             except Exception as e:
                 logger.warning(
                     "vipsthumbnail error for %s: %s; falling back to Pillow",
-                    image_path, e,
+                    image_path,
+                    e,
                 )
 
         # Fallback: Pillow in a child process (still async, separate process)
@@ -74,7 +79,9 @@ async def _compress_image(self, image_path, quality=85, max_size=(1920, 1080)):
                 f"quality={quality},optimize=True)"
             )
             process = await asyncio.create_subprocess_exec(
-                sys.executable, "-c", script,
+                sys.executable,
+                "-c",
+                script,
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
             )
@@ -85,7 +92,8 @@ async def _compress_image(self, image_path, quality=85, max_size=(1920, 1080)):
 
             logger.error(
                 "Pillow subprocess failed for %s (rc=%d): %s",
-                image_path, process.returncode,
+                image_path,
+                process.returncode,
                 stderr_bytes.decode() if stderr_bytes else "",
             )
             return None
@@ -115,6 +123,7 @@ async def _upload_single_file(self, upload_uuid, filename, timestamp, compressed
             upload_file = compressed_file if compressed_file and os.path.exists(compressed_file) else filename
 
             import time
+
             upload_start_time = time.time()
             file_ref = await self.google_client.aio.files.upload(file=upload_file)
             upload_duration = time.time() - upload_start_time
@@ -215,6 +224,7 @@ async def wait_for_upload(self, placeholder, timeout=30):
             return placeholder
 
         import time
+
         start_time = time.time()
         while time.time() - start_time < timeout:
             upload_status = await self.get_upload_status(placeholder)
@@ -245,7 +255,7 @@ async def cleanup_resolved_upload(self, placeholder):
     async def get_upload_status_summary(self):
         """Get a summary of current upload statuses."""
         async with self._upload_lock:
-            summary = {}
+            summary: dict[str, int] = {}
             for uid, info in self._upload_status.items():
                 status = info.get("status", "unknown")
                 summary[status] = summary.get(status, 0) + 1
diff --git a/mirix/client/remote_client.py b/mirix/client/remote_client.py
index 200bbd78d..df0ec5a7a 100644
--- a/mirix/client/remote_client.py
+++ b/mirix/client/remote_client.py
@@ -15,28 +15,23 @@
 from mirix.client.client import AbstractClient
 from mirix.constants import FUNCTION_RETURN_CHAR_LIMIT
 from mirix.log import get_logger
-from mirix.schemas.agent import AgentState, AgentType, CreateAgent, CreateMetaAgent
-from mirix.schemas.block import Block, BlockUpdate, CreateBlock, Human, Persona
+from mirix.schemas.agent import AgentState, AgentType
+from mirix.schemas.block import Block, Human, Persona
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.environment_variables import (
     SandboxEnvironmentVariable,
-    SandboxEnvironmentVariableCreate,
-    SandboxEnvironmentVariableUpdate,
 )
-from mirix.schemas.file import FileMetadata
 from mirix.schemas.llm_config import LLMConfig
 from mirix.schemas.memory import ArchivalMemorySummary, Memory, RecallMemorySummary
-from mirix.schemas.message import Message, MessageCreate
+from mirix.schemas.message import Message
 from mirix.schemas.mirix_response import MirixResponse
 from mirix.schemas.organization import Organization
 from mirix.schemas.sandbox_config import (
     E2BSandboxConfig,
     LocalSandboxConfig,
     SandboxConfig,
-    SandboxConfigCreate,
-    SandboxConfigUpdate,
 )
-from mirix.schemas.tool import Tool, ToolCreate, ToolUpdate
+from mirix.schemas.tool import Tool
 from mirix.schemas.tool_rule import BaseToolRule
 
 logger = get_logger(__name__)
@@ -103,7 +98,7 @@ async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
                 last_exc = exc
                 if attempt == self._max_retries:
                     raise
-            delay = self._backoff_factor * (2 ** attempt)
+            delay = self._backoff_factor * (2**attempt)
             await asyncio.sleep(delay)
         raise last_exc  # type: ignore[misc]
 
@@ -349,9 +344,7 @@ async def create_or_get_user(
         if not (headers and "X-API-Key" in headers) and (org_id or self.org_id):
             request_data["org_id"] = org_id or self.org_id
 
-        response = await self._request(
-            "POST", "/users/create_or_get", json=request_data, headers=headers
-        )
+        response = await self._request("POST", "/users/create_or_get", json=request_data, headers=headers)
         if isinstance(response, dict) and "id" in response:
             if self.debug:
                 logger.debug("User ready: %s", response["id"])
@@ -408,9 +401,7 @@ async def _request(
             if json:
                 logger.debug("[MirixClient] Request body: %s", json)
 
-        response = await self._client.request(
-            method=method, url=url, json=json, params=params, headers=headers
-        )
+        response = await self._client.request(method=method, url=url, json=json, params=params, headers=headers)
         try:
             response.raise_for_status()
         except httpx.HTTPStatusError as e:
@@ -418,9 +409,7 @@ async def _request(
                 error_detail = response.json().get("detail", str(e))
             except Exception:
                 error_detail = str(e)
-            raise httpx.HTTPStatusError(
-                error_detail, request=e.request, response=e.response
-            ) from e
+            raise httpx.HTTPStatusError(error_detail, request=e.request, response=e.response) from e
         if response.content:
             return response.json()
         return None
@@ -671,7 +660,9 @@ async def get_archival_memory_summary(
         data = await self._request("GET", f"/agents/{agent_id}/memory/archival", headers=headers)
         return ArchivalMemorySummary(**data)
 
-    async def get_recall_memory_summary(self, agent_id: str, headers: Optional[Dict[str, str]] = None) -> RecallMemorySummary:
+    async def get_recall_memory_summary(
+        self, agent_id: str, headers: Optional[Dict[str, str]] = None
+    ) -> RecallMemorySummary:
         """Get recall memory summary."""
         data = await self._request("GET", f"/agents/{agent_id}/memory/recall", headers=headers)
         return RecallMemorySummary(**data)
@@ -771,9 +762,7 @@ async def send_message(
         if not use_cache:
             request_data["use_cache"] = use_cache
 
-        data = await self._request(
-            "POST", f"/agents/{resolved_agent_id}/messages", json=request_data, headers=headers
-        )
+        data = await self._request("POST", f"/agents/{resolved_agent_id}/messages", json=request_data, headers=headers)
         return MirixResponse(**data)
 
     async def user_message(
diff --git a/mirix/constants.py b/mirix/constants.py
index 1dfedd582..da61be898 100644
--- a/mirix/constants.py
+++ b/mirix/constants.py
@@ -122,9 +122,9 @@
     "semantic_memory_update",
     "check_semantic_memory",
 ]
-CHAT_AGENT_TOOLS = []
+CHAT_AGENT_TOOLS: list[str] = []
 EXTRAS_TOOLS = ["web_search", "fetch_and_read_pdf"]
-MCP_TOOLS = []
+MCP_TOOLS: list[str] = []
 META_MEMORY_TOOLS = ["trigger_memory_update"]
 SEARCH_MEMORY_TOOLS = ["search_in_memory", "list_memory_within_timerange"]
 UNIVERSAL_MEMORY_TOOLS = [
diff --git a/mirix/database/filter_tags_query.py b/mirix/database/filter_tags_query.py
index 775eeb055..41d655e9a 100644
--- a/mirix/database/filter_tags_query.py
+++ b/mirix/database/filter_tags_query.py
@@ -17,11 +17,10 @@
 
 import json
 import re
-from typing import Any, Dict, List, Optional, Tuple, Type
+from typing import Any, Dict, List, Optional, Tuple
 
 from sqlalchemy import cast, or_, text, type_coerce
 from sqlalchemy.dialects.postgresql import JSONB
-from sqlalchemy.orm import Query
 
 SUPPORTED_OPERATORS = frozenset({"$contains", "$exists", "$in"})
 
@@ -36,14 +35,12 @@ def _validate_operator(value: dict) -> str:
         )
     if len(ops) > 1:
         raise ValueError(
-            f"filter_tags value dict has multiple operator keys: {ops!r}. "
-            f"Only one operator per key is supported."
+            f"filter_tags value dict has multiple operator keys: {ops!r}. " f"Only one operator per key is supported."
         )
     op = ops[0]
     if op not in SUPPORTED_OPERATORS:
         raise ValueError(
-            f"Unknown filter_tags operator '{op}'. "
-            f"Supported operators: {', '.join(sorted(SUPPORTED_OPERATORS))}"
+            f"Unknown filter_tags operator '{op}'. " f"Supported operators: {', '.join(sorted(SUPPORTED_OPERATORS))}"
         )
     return op
 
@@ -94,9 +91,7 @@ def apply_filter_tags_sqlalchemy(
         if _is_operator_dict(value):
             query = query.where(_resolve_operator_sqla(key, value, model_class))
         else:
-            query = query.where(
-                model_class.filter_tags[key].as_string() == str(value)
-            )
+            query = query.where(model_class.filter_tags[key].as_string() == str(value))
 
     return query
 
@@ -104,10 +99,7 @@ def apply_filter_tags_sqlalchemy(
 def _apply_scopes_sqla(query, model_class, scopes: List[str]):
     """Apply scope authorization filter for SQLAlchemy."""
     if scopes:
-        scope_conditions = [
-            model_class.filter_tags["scope"].as_string() == scope
-            for scope in scopes
-        ]
+        scope_conditions = [model_class.filter_tags["scope"].as_string() == scope for scope in scopes]
         return query.where(or_(*scope_conditions))
     return query.where(text("1 = 0"))
 
@@ -119,9 +111,7 @@ def _resolve_operator_sqla(key: str, value: dict, model_class):
     if op == "$contains":
         # Pass the dict directly — type_coerce lets psycopg2 serialize it once.
         # Using json.dumps + cast would double-encode the string.
-        return cast(model_class.filter_tags, JSONB).contains(
-            type_coerce({key: [value["$contains"]]}, JSONB)
-        )
+        return cast(model_class.filter_tags, JSONB).contains(type_coerce({key: [value["$contains"]]}, JSONB))
     elif op == "$exists":
         condition = cast(model_class.filter_tags, JSONB).has_key(key)  # noqa: W601
         if not value["$exists"]:
@@ -131,15 +121,14 @@ def _resolve_operator_sqla(key: str, value: dict, model_class):
         vals = value["$in"]
         if not isinstance(vals, list) or not vals:
             return text("1 = 0")
-        return model_class.filter_tags[key].as_string().in_(
-            [str(v) for v in vals]
-        )
+        return model_class.filter_tags[key].as_string().in_([str(v) for v in vals])
 
 
 # ---------------------------------------------------------------------------
 # Raw SQL builder (for BM25 full-text search paths)
 # ---------------------------------------------------------------------------
 
+
 def build_filter_tags_raw_sql(
     filter_tags: Optional[Dict[str, Any]],
     scopes: Optional[List[str]] = None,
@@ -189,9 +178,7 @@ def _build_scopes_raw_sql(scopes: List[str]) -> Tuple[List[str], Dict[str, Any]]
 
     if scopes:
         placeholders = [f":scope_{i}" for i in range(len(scopes))]
-        clauses.append(
-            f"filter_tags->>'scope' IN ({', '.join(placeholders)})"
-        )
+        clauses.append(f"filter_tags->>'scope' IN ({', '.join(placeholders)})")
         for i, scope in enumerate(scopes):
             params[f"scope_{i}"] = scope
     else:
@@ -200,9 +187,7 @@ def _build_scopes_raw_sql(scopes: List[str]) -> Tuple[List[str], Dict[str, Any]]
     return clauses, params
 
 
-def _resolve_operator_raw_sql(
-    key: str, value: dict
-) -> Tuple[str, Dict[str, Any]]:
+def _resolve_operator_raw_sql(key: str, value: dict) -> Tuple[str, Dict[str, Any]]:
     """Resolve a single $ operator into a raw SQL clause + params."""
     op = _validate_operator(value)
     params: Dict[str, Any] = {}
@@ -237,6 +222,7 @@ def _resolve_operator_raw_sql(
 # Redis support
 # ---------------------------------------------------------------------------
 
+
 def can_redis_handle(filter_tags: Optional[Dict[str, Any]]) -> bool:
     """
     Check whether all filter_tags values can be handled by Redis TAG queries.
@@ -274,6 +260,7 @@ def build_filter_tags_redis(
     Callers should check can_redis_handle() first; this function only
     handles scalar values and scopes.
     """
+
     def escape_tag_value(val: str) -> str:
         special_chars = r'[\-:.()\[\]{}"\',<>;!@#$%^&*+=~]'
         return re.sub(special_chars, lambda m: f"\\{m.group(0)}", str(val))
diff --git a/mirix/database/redis_client.py b/mirix/database/redis_client.py
index 8b47325dc..ef4aa5aa1 100644
--- a/mirix/database/redis_client.py
+++ b/mirix/database/redis_client.py
@@ -853,9 +853,7 @@ async def delete(self, key: str) -> bool:
             logger.error("Failed to delete key %s: %s", key, e)
             return False
 
-    def _build_filter_tags_query(
-        self, filter_tags: Dict[str, Any], scopes: Optional[List[str]] = None
-    ) -> str:
+    def _build_filter_tags_query(self, filter_tags: Dict[str, Any], scopes: Optional[List[str]] = None) -> str:
         """
         Build Redis Search query string from filter_tags and scopes.
 
@@ -924,7 +922,6 @@ async def search_text(
                 return []
 
             import re
-            from datetime import datetime
 
             from redis.commands.search.query import Query
 
@@ -1057,8 +1054,6 @@ async def search_vector(
                 logger.debug("filter_tags contain operators unsupported by Redis, skipping Redis search_vector")
                 return []
 
-            from datetime import datetime
-
             import numpy as np
             from redis.commands.search.query import Query
 
@@ -1191,8 +1186,6 @@ async def search_recent(
                 logger.debug("filter_tags contain operators unsupported by Redis, skipping Redis search_recent")
                 return []
 
-            from datetime import datetime
-
             from redis.commands.search.query import Query
 
             # Build query parts
@@ -1294,7 +1287,6 @@ async def search_recent_by_org(
                 return []
 
             import re
-            from datetime import datetime
 
             from redis.commands.search.query import Query
 
@@ -1386,7 +1378,6 @@ async def search_vector_by_org(
                 return []
 
             import re
-            from datetime import datetime
 
             from redis.commands.search.query import Query
 
@@ -1480,7 +1471,6 @@ async def search_text_by_org(
                 return []
 
             import re
-            from datetime import datetime
 
             from redis.commands.search.query import Query
 
diff --git a/mirix/functions/function_sets/memory_tools.py b/mirix/functions/function_sets/memory_tools.py
index 8ce5a8f86..feb347edf 100644
--- a/mirix/functions/function_sets/memory_tools.py
+++ b/mirix/functions/function_sets/memory_tools.py
@@ -1,10 +1,9 @@
 import asyncio
-import os
 import re
 from copy import deepcopy
 from typing import TYPE_CHECKING, List, Optional
 
-from mirix.agent import Agent, AgentState
+from mirix.agent import Agent
 
 if TYPE_CHECKING:
     from mirix.schemas.memory import Memory
@@ -265,7 +264,9 @@ async def check_episodic_memory(self: "Agent", event_ids: List[str], timezone_st
         raise ValueError("User is required to check episodic memory")
 
     episodic_memory = [
-        await self.episodic_memory_manager.get_episodic_memory_by_id(event_id, user=self.user, timezone_str=timezone_str)
+        await self.episodic_memory_manager.get_episodic_memory_by_id(
+            event_id, user=self.user, timezone_str=timezone_str
+        )
         for event_id in event_ids
     ]
 
@@ -893,9 +894,7 @@ async def trigger_memory_update(self: "Agent", user_message: object, memory_type
     def _agent_type_key(at):
         return at.value if hasattr(at, "value") else str(at)
 
-    agent_type_to_state = {
-        _agent_type_key(agent_state.agent_type): agent_state for agent_state in child_agent_states
-    }
+    agent_type_to_state = {_agent_type_key(agent_state.agent_type): agent_state for agent_state in child_agent_states}
 
     if not child_agent_states:
         raise ValueError(
diff --git a/mirix/functions/helpers.py b/mirix/functions/helpers.py
index 1d07e1024..a8a31fc58 100755
--- a/mirix/functions/helpers.py
+++ b/mirix/functions/helpers.py
@@ -15,8 +15,6 @@
 from mirix.log import get_logger
 from mirix.schemas.enums import MessageRole
 from mirix.schemas.message import MessageCreate
-
-logger = get_logger(__name__)
 from mirix.schemas.mirix_message import (
     AssistantMessage,
     ReasoningMessage,
@@ -24,6 +22,8 @@
 )
 from mirix.schemas.mirix_response import MirixResponse
 
+logger = get_logger(__name__)
+
 if TYPE_CHECKING:
     try:
         from langchain_core.tools import BaseTool as LangChainBaseTool
diff --git a/mirix/functions/mcp_client/base_client.py b/mirix/functions/mcp_client/base_client.py
index fc4aee9a3..1c3aeeaac 100644
--- a/mirix/functions/mcp_client/base_client.py
+++ b/mirix/functions/mcp_client/base_client.py
@@ -12,7 +12,7 @@
 from mirix.observability.context import get_trace_context
 from mirix.observability.langfuse_client import get_langfuse_client
 
-from .exceptions import MCPConnectionError, MCPNotInitializedError, MCPTimeoutError
+from .exceptions import MCPConnectionError, MCPNotInitializedError
 from .types import BaseServerConfig, MCPTool
 
 logger = logging.getLogger(__name__)
diff --git a/mirix/functions/mcp_client/gmail_client.py b/mirix/functions/mcp_client/gmail_client.py
index 9bc22de6f..3c92e3df1 100644
--- a/mirix/functions/mcp_client/gmail_client.py
+++ b/mirix/functions/mcp_client/gmail_client.py
@@ -28,9 +28,7 @@
 ]
 
 
-def authenticate_gmail_local(
-    client_id: str, client_secret: str, token_file: str = None
-) -> dict:
+def authenticate_gmail_local(client_id: str, client_secret: str, token_file: str = None) -> dict:
     """
     Authenticate with Gmail using OAuth2 with a local server to catch the callback.
     This is an interactive browser-based flow; inherently sync.
@@ -66,9 +64,7 @@ def authenticate_gmail_local(
                         "client_secret": client_secret,
                         "auth_uri": "https://accounts.google.com/o/oauth2/auth",
                         "token_uri": "https://oauth2.googleapis.com/token",
-                        "auth_provider_x509_cert_url": (
-                            "https://www.googleapis.com/oauth2/v1/certs"
-                        ),
+                        "auth_provider_x509_cert_url": ("https://www.googleapis.com/oauth2/v1/certs"),
                         "redirect_uris": [
                             "http://localhost:8080/",
                             "http://localhost:8081/",
@@ -134,9 +130,7 @@ def _load_credentials(self) -> bool:
             with open(self._token_file) as f:
                 token_data = json.load(f)
             if not token_data.get("refresh_token"):
-                logger.warning(
-                    "Token file missing refresh_token, removing invalid token."
-                )
+                logger.warning("Token file missing refresh_token, removing invalid token.")
                 os.remove(self._token_file)
                 return False
 
@@ -145,9 +139,7 @@ def _load_credentials(self) -> bool:
                 refresh_token=token_data.get("refresh_token"),
                 expires_at=token_data.get("expiry"),
                 scopes=GMAIL_SCOPES,
-                token_uri=token_data.get(
-                    "token_uri", "https://oauth2.googleapis.com/token"
-                ),
+                token_uri=token_data.get("token_uri", "https://oauth2.googleapis.com/token"),
             )
             self._client_creds = ClientCreds(
                 client_id=self._client_id,
@@ -161,9 +153,7 @@ def _load_credentials(self) -> bool:
     async def _initialize_connection(self, server_config, timeout: float) -> bool:
         """Initialize Gmail connection using OAuth and discover the API."""
         try:
-            self._token_file = server_config.token_file or os.path.expanduser(
-                "~/.mirix/gmail_token.json"
-            )
+            self._token_file = server_config.token_file or os.path.expanduser("~/.mirix/gmail_token.json")
             self._client_id = server_config.client_id
             self._client_secret = server_config.client_secret
 
@@ -182,14 +172,10 @@ async def _initialize_connection(self, server_config, timeout: float) -> bool:
                     return False
 
                 if not self._load_credentials():
-                    logger.error(
-                        "Failed to load credentials after authentication"
-                    )
+                    logger.error("Failed to load credentials after authentication")
                     return False
 
-            async with Aiogoogle(
-                user_creds=self._user_creds, client_creds=self._client_creds
-            ) as aiogoogle:
+            async with Aiogoogle(user_creds=self._user_creds, client_creds=self._client_creds) as aiogoogle:
                 self._gmail_api = await aiogoogle.discover("gmail", "v1")
 
             logger.info("Gmail service initialized successfully")
@@ -201,9 +187,7 @@ async def _initialize_connection(self, server_config, timeout: float) -> bool:
 
     async def _execute_gmail_request(self, request):
         """Execute a single Gmail API request with auto token refresh."""
-        async with Aiogoogle(
-            user_creds=self._user_creds, client_creds=self._client_creds
-        ) as aiogoogle:
+        async with Aiogoogle(user_creds=self._user_creds, client_creds=self._client_creds) as aiogoogle:
             return await aiogoogle.as_user(request)
 
     async def list_tools(self) -> List[MCPTool]:
@@ -258,15 +242,11 @@ async def list_tools(self) -> List[MCPTool]:
                     "properties": {
                         "query": {
                             "type": "string",
-                            "description": (
-                                "Gmail search query (optional, e.g., 'is:unread')"
-                            ),
+                            "description": ("Gmail search query (optional, e.g., 'is:unread')"),
                         },
                         "max_results": {
                             "type": "integer",
-                            "description": (
-                                "Maximum number of emails to retrieve (default: 10)"
-                            ),
+                            "description": ("Maximum number of emails to retrieve (default: 10)"),
                             "default": 10,
                         },
                     },
@@ -288,16 +268,13 @@ async def list_tools(self) -> List[MCPTool]:
             ),
         ]
 
-    async def execute_tool(
-        self, tool_name: str, tool_args: Dict[str, Any]
-    ) -> Tuple[str, bool]:
+    async def execute_tool(self, tool_name: str, tool_args: Dict[str, Any]) -> Tuple[str, bool]:
         """Execute a Gmail tool."""
         self._check_initialized()
 
         if not await self._ensure_gmail_service():
             return (
-                "Gmail authentication required. "
-                "Please run the Gmail connection process.",
+                "Gmail authentication required. " "Please run the Gmail connection process.",
                 True,
             )
 
@@ -320,9 +297,7 @@ async def _ensure_gmail_service(self) -> bool:
             return True
 
         try:
-            success = await self._initialize_connection(
-                self.server_config, timeout=30.0
-            )
+            success = await self._initialize_connection(self.server_config, timeout=30.0)
             if success and self._gmail_api is not None:
                 logger.info("Gmail service established successfully")
                 return True
@@ -344,15 +319,9 @@ async def _send_email(self, args: Dict[str, Any]) -> Tuple[str, bool]:
             html_body = args.get("html_body")
             attachments = args.get("attachments", [])
 
-            message = self._create_message(
-                to, subject, body, cc, bcc, attachments, html_body
-            )
+            message = self._create_message(to, subject, body, cc, bcc, attachments, html_body)
 
-            result = await self._execute_gmail_request(
-                self._gmail_api.users.messages.send(
-                    userId="me", json=message
-                )
-            )
+            result = await self._execute_gmail_request(self._gmail_api.users.messages.send(userId="me", json=message))
 
             return f"Email sent successfully! Message ID: {result['id']}", False
 
@@ -370,9 +339,7 @@ async def _read_emails(self, args: Dict[str, Any]) -> Tuple[str, bool]:
                 client_creds=self._client_creds,
             ) as aiogoogle:
                 results = await aiogoogle.as_user(
-                    self._gmail_api.users.messages.list(
-                        userId="me", q=query, maxResults=max_results
-                    )
+                    self._gmail_api.users.messages.list(userId="me", q=query, maxResults=max_results)
                 )
 
                 messages = results.get("messages", [])
@@ -381,35 +348,19 @@ async def _read_emails(self, args: Dict[str, Any]) -> Tuple[str, bool]:
 
                 email_details = []
                 for msg_entry in messages:
-                    msg = await aiogoogle.as_user(
-                        self._gmail_api.users.messages.get(
-                            userId="me", id=msg_entry["id"]
-                        )
-                    )
+                    msg = await aiogoogle.as_user(self._gmail_api.users.messages.get(userId="me", id=msg_entry["id"]))
 
                     headers = msg["payload"].get("headers", [])
                     subject = next(
-                        (
-                            h["value"]
-                            for h in headers
-                            if h["name"] == "Subject"
-                        ),
+                        (h["value"] for h in headers if h["name"] == "Subject"),
                         "No Subject",
                     )
                     sender = next(
-                        (
-                            h["value"]
-                            for h in headers
-                            if h["name"] == "From"
-                        ),
+                        (h["value"] for h in headers if h["name"] == "From"),
                         "Unknown Sender",
                     )
                     date = next(
-                        (
-                            h["value"]
-                            for h in headers
-                            if h["name"] == "Date"
-                        ),
+                        (h["value"] for h in headers if h["name"] == "Date"),
                         "Unknown Date",
                     )
 
@@ -432,9 +383,7 @@ async def _get_email(self, args: Dict[str, Any]) -> Tuple[str, bool]:
         try:
             email_id = args["email_id"]
 
-            message = await self._execute_gmail_request(
-                self._gmail_api.users.messages.get(userId="me", id=email_id)
-            )
+            message = await self._execute_gmail_request(self._gmail_api.users.messages.get(userId="me", id=email_id))
 
             headers = message["payload"].get("headers", [])
             subject = next(
@@ -477,18 +426,12 @@ def _create_message(
     ) -> dict:
         """Create an email message dict for the Gmail API."""
         if html_body or attachments:
-            message = MIMEMultipart(
-                "alternative" if html_body else "mixed"
-            )
+            message = MIMEMultipart("alternative" if html_body else "mixed")
         else:
             message = MIMEText(body)
             message["to"] = to
             message["subject"] = subject
-            return {
-                "raw": base64.urlsafe_b64encode(
-                    message.as_bytes()
-                ).decode()
-            }
+            return {"raw": base64.urlsafe_b64encode(message.as_bytes()).decode()}
 
         message["to"] = to
         message["subject"] = subject
@@ -525,13 +468,9 @@ def _create_message(
                     )
                     message.attach(attachment)
                 else:
-                    logger.warning(
-                        "Attachment file '%s' not found", file_path
-                    )
+                    logger.warning("Attachment file '%s' not found", file_path)
 
-        return {
-            "raw": base64.urlsafe_b64encode(message.as_bytes()).decode()
-        }
+        return {"raw": base64.urlsafe_b64encode(message.as_bytes()).decode()}
 
     def _extract_message_body(self, payload):
         """Extract message body from Gmail API payload."""
@@ -544,8 +483,6 @@ def _extract_message_body(self, payload):
                     body = base64.urlsafe_b64decode(data).decode("utf-8")
                     break
         elif payload["body"].get("data"):
-            body = base64.urlsafe_b64decode(
-                payload["body"]["data"]
-            ).decode("utf-8")
+            body = base64.urlsafe_b64decode(payload["body"]["data"]).decode("utf-8")
 
         return body
diff --git a/mirix/functions/mcp_client/manager.py b/mirix/functions/mcp_client/manager.py
index 798cd34a4..646ff4e81 100644
--- a/mirix/functions/mcp_client/manager.py
+++ b/mirix/functions/mcp_client/manager.py
@@ -169,9 +169,7 @@ async def list_tools(self, server_name: Optional[str] = None) -> Dict[str, List[
                     all_tools[name] = []
             return all_tools
 
-    async def execute_tool(
-        self, server_name: str, tool_name: str, tool_args: Dict[str, Any]
-    ) -> Tuple[str, bool]:
+    async def execute_tool(self, server_name: str, tool_name: str, tool_args: Dict[str, Any]) -> Tuple[str, bool]:
         """Asynchronously execute a tool on a specific server"""
         await self._ensure_loaded()
 
@@ -194,9 +192,7 @@ async def find_tool(self, tool_name: str) -> Optional[Tuple[str, MCPTool]]:
                 logger.error(f"Failed to search tools in server {server_name}: {str(e)}")
         return None
 
-    async def execute_tool_by_name(
-        self, tool_name: str, tool_args: Dict[str, Any]
-    ) -> Tuple[str, bool]:
+    async def execute_tool_by_name(self, tool_name: str, tool_args: Dict[str, Any]) -> Tuple[str, bool]:
         """Execute a tool by name (searches all servers)"""
         result = await self.find_tool(tool_name)
         if result:
diff --git a/mirix/llm_api/anthropic_client.py b/mirix/llm_api/anthropic_client.py
index 612dc28de..26c9ac527 100644
--- a/mirix/llm_api/anthropic_client.py
+++ b/mirix/llm_api/anthropic_client.py
@@ -49,9 +49,7 @@
 class AnthropicClient(LLMClientBase):
     async def request(self, request_data: dict) -> dict:
         client = await self._get_anthropic_client(async_client=True)
-        response = await client.beta.messages.create(
-            **request_data, betas=["tools-2024-04-04"]
-        )
+        response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
         return response.model_dump()
 
     @trace_method
@@ -112,7 +110,9 @@ async def send_llm_batch_request_async(
             raise self.handle_llm_error(e)
 
     @trace_method
-    async def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
+    async def _get_anthropic_client(
+        self, async_client: bool = False
+    ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
         override_key = await ProviderManager().get_anthropic_override_key()
         if async_client:
             return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
diff --git a/mirix/llm_api/azure_openai.py b/mirix/llm_api/azure_openai.py
index 5f455cc4f..4cda94d2e 100755
--- a/mirix/llm_api/azure_openai.py
+++ b/mirix/llm_api/azure_openai.py
@@ -29,9 +29,7 @@ def get_azure_deployment_list_endpoint(base_url: str):
     return f"{base_url}/openai/deployments?api-version=2023-03-15-preview"
 
 
-async def azure_openai_get_deployed_model_list(
-    base_url: str, api_key: str, api_version: str
-) -> List[dict]:
+async def azure_openai_get_deployed_model_list(base_url: str, api_key: str, api_version: str) -> List[dict]:
     """Returns list of deployed models using httpx."""
     headers = {"Content-Type": "application/json"}
     if api_key is not None:
@@ -65,9 +63,7 @@ async def azure_openai_get_deployed_model_list(
     return list(latest_models.values())
 
 
-async def azure_openai_get_chat_completion_model_list(
-    base_url: str, api_key: str, api_version: str
-) -> list:
+async def azure_openai_get_chat_completion_model_list(base_url: str, api_key: str, api_version: str) -> list:
     model_list = await azure_openai_get_deployed_model_list(base_url, api_key, api_version)
     # Extract models that support text generation
     model_options = [m for m in model_list if m.get("capabilities").get("chat_completion")]
diff --git a/mirix/llm_api/azure_openai_client.py b/mirix/llm_api/azure_openai_client.py
index c13d655d6..a11dfcc06 100644
--- a/mirix/llm_api/azure_openai_client.py
+++ b/mirix/llm_api/azure_openai_client.py
@@ -1,4 +1,3 @@
-
 import os
 from typing import List, Optional
 
@@ -104,9 +103,7 @@ async def request(self, request_data: dict) -> dict:
         Performs asynchronous request to Azure OpenAI API.
         """
         client = AsyncAzureOpenAI(**await self._prepare_client_kwargs())
-        response: ChatCompletion = await client.chat.completions.create(
-            **request_data
-        )
+        response: ChatCompletion = await client.chat.completions.create(**request_data)
         return response.model_dump()
 
     async def stream(self, request_data: dict) -> AsyncStream[ChatCompletionChunk]:
diff --git a/mirix/llm_api/google_ai.py b/mirix/llm_api/google_ai.py
index ee8a69bbc..6c17ea0fb 100644
--- a/mirix/llm_api/google_ai.py
+++ b/mirix/llm_api/google_ai.py
@@ -57,7 +57,9 @@ def get_gemini_endpoint_and_headers(
     return url, headers
 
 
-async def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
+async def google_ai_get_model_details(
+    base_url: str, api_key: str, model: str, key_in_header: bool = True
+) -> List[dict]:
     from mirix.utils import printd
 
     url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
@@ -93,7 +95,9 @@ async def google_ai_get_model_details(base_url: str, api_key: str, model: str, k
         raise e
 
 
-async def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
+async def google_ai_get_model_context_window(
+    base_url: str, api_key: str, model: str, key_in_header: bool = True
+) -> int:
     model_details = await google_ai_get_model_details(
         base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header
     )
diff --git a/mirix/llm_api/google_ai_client.py b/mirix/llm_api/google_ai_client.py
index de40d53d8..4aa48f0a3 100644
--- a/mirix/llm_api/google_ai_client.py
+++ b/mirix/llm_api/google_ai_client.py
@@ -138,7 +138,9 @@ def combine_tool_responses(self, contents: List[dict]) -> List[dict]:
             idx += 1
         return new_contents
 
-    async def fill_image_content_in_messages(self, google_ai_message_list, existing_file_uris: Optional[List[str]] = None):
+    async def fill_image_content_in_messages(
+        self, google_ai_message_list, existing_file_uris: Optional[List[str]] = None
+    ):
         """
         Converts image URIs in the message to base64 format.
         """
@@ -549,7 +551,9 @@ async def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: b
         raise e
 
 
-async def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
+async def google_ai_get_model_details(
+    base_url: str, api_key: str, model: str, key_in_header: bool = True
+) -> List[dict]:
     from mirix.utils import printd
 
     url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
@@ -579,7 +583,9 @@ async def google_ai_get_model_details(base_url: str, api_key: str, model: str, k
         raise e
 
 
-async def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
+async def google_ai_get_model_context_window(
+    base_url: str, api_key: str, model: str, key_in_header: bool = True
+) -> int:
     model_details = await google_ai_get_model_details(
         base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header
     )
diff --git a/mirix/llm_api/helpers.py b/mirix/llm_api/helpers.py
index 153e86b79..aa0414f2a 100755
--- a/mirix/llm_api/helpers.py
+++ b/mirix/llm_api/helpers.py
@@ -1,8 +1,4 @@
-import copy
-import json
 import logging
-import warnings
-from collections import OrderedDict
 from typing import Any, List, Union
 
 import httpx
@@ -10,9 +6,8 @@
 from mirix.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
 from mirix.schemas.enums import MessageRole
 from mirix.schemas.message import Message
-from mirix.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
 from mirix.settings import summarizer_settings
-from mirix.utils import count_tokens, json_dumps, printd
+from mirix.utils import count_tokens, printd
 
 logger = logging.getLogger(__name__)
 
diff --git a/mirix/llm_api/llm_api_tools.py b/mirix/llm_api/llm_api_tools.py
index ac4558042..4f086237a 100755
--- a/mirix/llm_api/llm_api_tools.py
+++ b/mirix/llm_api/llm_api_tools.py
@@ -6,15 +6,6 @@
 import httpx
 
 from mirix.constants import CLI_WARNING_PREFIX
-from mirix.log import get_logger
-from mirix.observability.context import get_trace_context, mark_observation_as_child
-from mirix.observability.langfuse_client import get_langfuse_client
-
-logger = get_logger(__name__)
-
-if TYPE_CHECKING:
-    from mirix.interface import AgentChunkStreamingInterface
-
 from mirix.errors import MirixConfigurationError, RateLimitExceededError
 from mirix.llm_api.anthropic import (
     anthropic_bedrock_chat_completions_request,
@@ -30,6 +21,9 @@
     build_openai_chat_completions_request,
     openai_chat_completions_request,
 )
+from mirix.log import get_logger
+from mirix.observability.context import get_trace_context, mark_observation_as_child
+from mirix.observability.langfuse_client import get_langfuse_client
 from mirix.schemas.llm_config import LLMConfig
 from mirix.schemas.message import Message
 from mirix.schemas.openai.chat_completion_request import (
@@ -41,6 +35,11 @@
 from mirix.settings import ModelSettings
 from mirix.utils import num_tokens_from_functions, num_tokens_from_messages
 
+logger = get_logger(__name__)
+
+if TYPE_CHECKING:
+    from mirix.interface import AgentChunkStreamingInterface
+
 LLM_API_PROVIDER_OPTIONS = [
     "openai",
     "azure",
@@ -203,9 +202,7 @@ async def create(
         try:
             messages_oai_format = [m.to_openai_dict() for m in messages]
             prompt_tokens = num_tokens_from_messages(messages=messages_oai_format, model=llm_config.model)
-            function_tokens = (
-                num_tokens_from_functions(functions=functions, model=llm_config.model) if functions else 0
-            )
+            function_tokens = num_tokens_from_functions(functions=functions, model=llm_config.model) if functions else 0
             if prompt_tokens + function_tokens > llm_config.context_window:
                 raise Exception(
                     f"Request exceeds maximum context length ({prompt_tokens + function_tokens} > {llm_config.context_window} tokens)"
@@ -272,13 +269,9 @@ async def create(
                                 if hasattr(msg, "tool_calls") and msg.tool_calls:
                                     output_message["tool_calls"] = [
                                         {
-                                            "name": (
-                                                tc.function.name if hasattr(tc, "function") else str(tc)
-                                            ),
+                                            "name": (tc.function.name if hasattr(tc, "function") else str(tc)),
                                             "arguments": (
-                                                str(tc.function.arguments)[:200]
-                                                if hasattr(tc, "function")
-                                                else ""
+                                                str(tc.function.arguments)[:200] if hasattr(tc, "function") else ""
                                             ),
                                         }
                                         for tc in msg.tool_calls[:5]
@@ -301,9 +294,7 @@ async def create(
             # azure
             elif llm_config.model_endpoint_type == "azure":
                 if stream:
-                    raise NotImplementedError(
-                        f"Streaming not yet implemented for {llm_config.model_endpoint_type}"
-                    )
+                    raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
 
                 if model_settings.azure_api_key is None:
                     raise MirixConfigurationError(
@@ -344,9 +335,7 @@ async def create(
 
             elif llm_config.model_endpoint_type == "google_ai":
                 if stream:
-                    raise NotImplementedError(
-                        f"Streaming not yet implemented for {llm_config.model_endpoint_type}"
-                    )
+                    raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
                 if not use_tool_naming:
                     raise NotImplementedError("Only tool calling supported on Google AI API requests")
 
@@ -442,9 +431,7 @@ async def create(
 
             elif llm_config.model_endpoint_type == "anthropic":
                 if stream:
-                    raise NotImplementedError(
-                        f"Streaming not yet implemented for {llm_config.model_endpoint_type}"
-                    )
+                    raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
                 if not use_tool_naming:
                     raise NotImplementedError("Only tool calling supported on Anthropic API requests")
 
@@ -457,9 +444,7 @@ async def create(
                     data=ChatCompletionRequest(
                         model=llm_config.model,
                         messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
-                        tools=(
-                            [{"type": "function", "function": f} for f in functions] if functions else None
-                        ),
+                        tools=([{"type": "function", "function": f} for f in functions] if functions else None),
                         tool_choice=tool_call,
                         max_tokens=4096,  # TODO make dynamic
                         image_uris=image_uris["image_uris"],
@@ -481,17 +466,11 @@ async def create(
                         missing_fields=["groq_api_key"],
                     )
 
-                tools = (
-                    [{"type": "function", "function": f} for f in functions]
-                    if functions is not None
-                    else None
-                )
+                tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
                 data = ChatCompletionRequest(
                     model=llm_config.model,
                     messages=[
-                        m.to_openai_dict(
-                            put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs
-                        )
+                        m.to_openai_dict(put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
                         for m in messages
                     ],
                     tools=tools,
@@ -540,9 +519,7 @@ async def create(
                     data=ChatCompletionRequest(
                         model=llm_config.model,
                         messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
-                        tools=(
-                            [{"type": "function", "function": f} for f in functions] if functions else None
-                        ),
+                        tools=([{"type": "function", "function": f} for f in functions] if functions else None),
                         tool_choice=tool_call,
                         max_tokens=1024,  # TODO make dynamic
                     ),
diff --git a/mirix/llm_api/llm_client_base.py b/mirix/llm_api/llm_client_base.py
index bfce28254..09370ee9e 100644
--- a/mirix/llm_api/llm_client_base.py
+++ b/mirix/llm_api/llm_client_base.py
@@ -63,16 +63,12 @@ async def send_llm_request(
         trace_id = trace_context.get("trace_id") if trace_context else None
         parent_span_id = trace_context.get("observation_id") if trace_context else None
         if langfuse and trace_id:
-            return await self._execute_with_langfuse(
-                langfuse, request_data, messages, tools, trace_id, parent_span_id
-            )
+            return await self._execute_with_langfuse(langfuse, request_data, messages, tools, trace_id, parent_span_id)
         reason = "LangFuse client not available" if not langfuse else "No active trace_id in context"
         self.logger.debug(f"Sending LLM request without LangFuse tracing ({reason})")
         return await self._execute_without_langfuse(request_data, messages)
 
-    async def _execute_without_langfuse(
-        self, request_data: dict, messages: List[Message]
-    ) -> ChatCompletionResponse:
+    async def _execute_without_langfuse(self, request_data: dict, messages: List[Message]) -> ChatCompletionResponse:
         """Execute LLM request without LangFuse tracing."""
         try:
             t1 = time.time()
diff --git a/mirix/llm_api/openai_client.py b/mirix/llm_api/openai_client.py
index e0912103b..5696ff2da 100644
--- a/mirix/llm_api/openai_client.py
+++ b/mirix/llm_api/openai_client.py
@@ -1,4 +1,3 @@
-
 import base64
 import os
 from typing import List, Optional
@@ -293,9 +292,7 @@ async def request(self, request_data: dict) -> dict:
         Performs underlying asynchronous request to OpenAI API and returns raw response dict.
         """
         client_kwargs = await self._prepare_client_kwargs()
-        logger.debug(
-            "OpenAI Request - Making request to %s", client_kwargs.get("base_url")
-        )
+        logger.debug("OpenAI Request - Making request to %s", client_kwargs.get("base_url"))
         logger.debug(
             "OpenAI Request - Model: %s, Max tokens: %s, Temperature: %s",
             request_data.get("model"),
diff --git a/mirix/local_client/local_client.py b/mirix/local_client/local_client.py
index 995ab43ed..823b96a1d 100644
--- a/mirix/local_client/local_client.py
+++ b/mirix/local_client/local_client.py
@@ -14,7 +14,7 @@
 import os
 import shutil
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 from urllib.parse import urlparse
 
 import httpx
@@ -175,9 +175,7 @@ async def _ensure_client(self) -> None:
         if self.organization is None:
             self.organization = await self.server.get_organization_or_default(self.org_id)
         if self.client is None:
-            self.client = await self.server.client_manager.get_client_or_default(
-                self.client_id, self.org_id
-            )
+            self.client = await self.server.client_manager.get_client_or_default(self.client_id, self.org_id)
 
     @classmethod
     async def create(
@@ -579,9 +577,7 @@ async def _resolve_tool_ids():
 
         memory = memory or Memory()
         for block in memory.get_blocks():
-            await self.server.block_manager.create_or_update_block(
-                block, actor=self.client, user=self.user
-            )
+            await self.server.block_manager.create_or_update_block(block, actor=self.client, user=self.user)
 
         block_ids = block_ids or []
         create_params = {
@@ -606,14 +602,10 @@ async def _resolve_tool_ids():
             CreateAgent(**create_params),
             actor=self.client,
         )
-        return await self.server.agent_manager.get_agent_by_id(
-            agent_state.id, actor=self.client
-        )
+        return await self.server.agent_manager.get_agent_by_id(agent_state.id, actor=self.client)
 
     async def create_user(self, user_id: str, user_name: str) -> PydanticUser:
-        return await self.server.user_manager.create_user(
-            UserCreate(id=user_id, name=user_name)
-        )
+        return await self.server.user_manager.create_user(UserCreate(id=user_id, name=user_name))
 
     async def create_meta_agent(
         self,
@@ -649,9 +641,7 @@ async def get_tools_from_agent(self, agent_id: str) -> List[Tool]:
         """Get tools from an existing agent."""
         self.interface.clear()
         await self._ensure_client()
-        agent = await self.server.agent_manager.get_agent_by_id(
-            agent_id=agent_id, actor=self.client
-        )
+        agent = await self.server.agent_manager.get_agent_by_id(agent_id=agent_id, actor=self.client)
         return agent.tools
 
     async def add_tool_to_agent(self, agent_id: str, tool_id: str) -> AgentState:
@@ -667,9 +657,7 @@ async def add_tool_to_agent(self, agent_id: str, tool_id: str) -> AgentState:
         """
         self.interface.clear()
         await self._ensure_client()
-        return await self.server.agent_manager.attach_tool(
-            agent_id=agent_id, tool_id=tool_id, actor=self.client
-        )
+        return await self.server.agent_manager.attach_tool(agent_id=agent_id, tool_id=tool_id, actor=self.client)
 
     async def remove_tool_from_agent(self, agent_id: str, tool_id: str) -> AgentState:
         """
@@ -684,9 +672,7 @@ async def remove_tool_from_agent(self, agent_id: str, tool_id: str) -> AgentStat
         """
         self.interface.clear()
         await self._ensure_client()
-        return await self.server.agent_manager.detach_tool(
-            agent_id=agent_id, tool_id=tool_id, actor=self.client
-        )
+        return await self.server.agent_manager.detach_tool(agent_id=agent_id, tool_id=tool_id, actor=self.client)
 
     async def update_agent(
         self,
@@ -757,17 +743,13 @@ async def get_agent_by_name(self, agent_name: str) -> AgentState:
         """Get an agent by its name."""
         self.interface.clear()
         await self._ensure_client()
-        return await self.server.agent_manager.get_agent_by_name(
-            agent_name=agent_name, actor=self.client
-        )
+        return await self.server.agent_manager.get_agent_by_name(agent_name=agent_name, actor=self.client)
 
     async def get_agent(self, agent_id: str) -> AgentState:
         """Get an agent's state by its ID."""
         self.interface.clear()
         await self._ensure_client()
-        return await self.server.agent_manager.get_agent_by_id(
-            agent_id=agent_id, actor=self.client
-        )
+        return await self.server.agent_manager.get_agent_by_id(agent_id=agent_id, actor=self.client)
 
     async def get_agent_id(self, agent_name: str) -> Optional[str]:
         """Get the ID of an agent by name (names are unique per user)."""
@@ -782,9 +764,7 @@ async def get_agent_id(self, agent_name: str) -> Optional[str]:
     async def get_archival_memory_summary(self, agent_id: str) -> ArchivalMemorySummary:
         """Get a summary of the archival memory of an agent."""
         await self._ensure_client()
-        return await self.server.get_archival_memory_summary(
-            agent_id=agent_id, actor=self.client
-        )
+        return await self.server.get_archival_memory_summary(agent_id=agent_id, actor=self.client)
 
     async def get_recall_memory_summary(self, agent_id: str) -> RecallMemorySummary:
         """Get a summary of the recall memory of an agent."""
@@ -832,9 +812,7 @@ async def construct_system_message(self, agent_id: str, message: str, user_id: s
             actor=self.client,
         )
 
-    async def extract_memory_for_system_prompt(
-        self, agent_id: str, message: str, user_id: Optional[str] = None
-    ) -> str:
+    async def extract_memory_for_system_prompt(self, agent_id: str, message: str, user_id: Optional[str] = None) -> str:
         """Extract memory for system prompt from a message."""
         await self._ensure_client()
         return await self.server.extract_memory_for_system_prompt(
@@ -1077,9 +1055,7 @@ async def convert_message(m):
     async def user_message(self, agent_id: str, message: str, user_id: Optional[str] = None) -> MirixResponse:
         """Send a message to an agent as a user."""
         self.interface.clear()
-        return await self.send_message(
-            role="user", agent_id=agent_id, message=message, user_id=user_id
-        )
+        return await self.send_message(role="user", agent_id=agent_id, message=message, user_id=user_id)
 
     async def run_command(self, agent_id: str, command: str) -> MirixResponse:
         """
@@ -1499,9 +1475,7 @@ async def list_tools(self, cursor: Optional[str] = None, limit: Optional[int] =
             List of tools.
         """
         await self._ensure_client()
-        return await self.server.tool_manager.list_tools(
-            cursor=cursor, limit=limit, actor=self.client
-        )
+        return await self.server.tool_manager.list_tools(cursor=cursor, limit=limit, actor=self.client)
 
     async def get_tool(self, id: str) -> Tool:
         """
@@ -1542,13 +1516,13 @@ async def get_tool_id(self, name: str) -> Optional[str]:
     async def get_tool_by_name(self, name: str) -> Optional[Tool]:
         """Get tool by name."""
         await self._ensure_client()
-        return await self.server.tool_manager.get_tool_by_name(
-            tool_name=name, actor=self.client
-        )
+        return await self.server.tool_manager.get_tool_by_name(tool_name=name, actor=self.client)
 
     # recall memory
 
-    async def get_messages(self, agent_id: str, cursor: Optional[str] = None, limit: Optional[int] = 1000) -> List[Message]:
+    async def get_messages(
+        self, agent_id: str, cursor: Optional[str] = None, limit: Optional[int] = 1000
+    ) -> List[Message]:
         """
         Get messages from an agent with pagination.
 
@@ -1778,9 +1752,7 @@ async def delete_file(self, file_id: str) -> None:
 
     async def search_files(self, name_pattern: str) -> List[FileMetadata]:
         """Search files by name pattern."""
-        return await self.file_manager.search_files_by_name(
-            file_name=name_pattern, organization_id=self.org_id
-        )
+        return await self.file_manager.search_files_by_name(file_name=name_pattern, organization_id=self.org_id)
 
     async def get_file_stats(self) -> dict:
         """
diff --git a/mirix/orm/agent.py b/mirix/orm/agent.py
index 485cbecad..18bfb8557 100755
--- a/mirix/orm/agent.py
+++ b/mirix/orm/agent.py
@@ -87,6 +87,20 @@ class Agent(SqlalchemyBase, OrganizationMixin):
 
     def to_pydantic(self) -> PydanticAgentState:
         """converts to the basic pydantic model counterpart"""
+        from sqlalchemy import inspect
+
+        # Check if we're in a session and tools are loaded
+        # This prevents MissingGreenlet when accessing relationships outside session
+        insp = inspect(self)
+
+        # For tools: if already loaded, use them; otherwise use empty list
+        # tools has lazy="selectin" so should be loaded, but this handles edge cases
+        if "tools" in insp.dict:
+            tools = self.tools
+        else:
+            # Tools not loaded (detached instance or session closed)
+            tools = []
+
         state = {
             "id": self.id,
             "organization_id": self.organization_id,
@@ -95,7 +109,7 @@ def to_pydantic(self) -> PydanticAgentState:
             "parent_id": self.parent_id,
             "children": None,  # Children are populated separately when needed
             "message_ids": self.message_ids,
-            "tools": self.tools,
+            "tools": tools,
             "tool_rules": self.tool_rules,
             "system": self.system,
             "agent_type": self.agent_type,
diff --git a/mirix/orm/base.py b/mirix/orm/base.py
index 3644640b1..5ab182cb0 100755
--- a/mirix/orm/base.py
+++ b/mirix/orm/base.py
@@ -1,5 +1,4 @@
-import datetime as dt
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Optional
 
 from sqlalchemy import Boolean, DateTime, String, func, text
@@ -34,7 +33,7 @@ def set_updated_at(self, timestamp: Optional[datetime] = None) -> None:
             timestamp (Optional[datetime]): The timestamp to set.
                                             If None, uses the current UTC time.
         """
-        self.updated_at = timestamp or datetime.now(dt.UTC)
+        self.updated_at = timestamp or datetime.now(timezone.utc)
 
     def _set_created_and_updated_by_fields(self, actor_id: str) -> None:
         """Populate created_by_id and last_updated_by_id based on actor."""
diff --git a/mirix/orm/block.py b/mirix/orm/block.py
index 399f5bd71..91fd21ac1 100755
--- a/mirix/orm/block.py
+++ b/mirix/orm/block.py
@@ -1,7 +1,6 @@
 from typing import TYPE_CHECKING, List, Optional, Type
 
-from sqlalchemy import JSON, BigInteger, Index, Integer, String, UniqueConstraint, cast, event, or_, select, text
-from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy import JSON, BigInteger, Index, Integer, UniqueConstraint, event, or_, select, text
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import (
     Mapped,
diff --git a/mirix/orm/custom_columns.py b/mirix/orm/custom_columns.py
index f51621662..3f66003f0 100755
--- a/mirix/orm/custom_columns.py
+++ b/mirix/orm/custom_columns.py
@@ -1,4 +1,4 @@
-from datetime import datetime, timezone
+from datetime import timezone
 
 from sqlalchemy import JSON
 from sqlalchemy.types import BINARY, DateTime, TypeDecorator
diff --git a/mirix/orm/episodic_memory.py b/mirix/orm/episodic_memory.py
index 341fcb1da..2928fd82f 100755
--- a/mirix/orm/episodic_memory.py
+++ b/mirix/orm/episodic_memory.py
@@ -204,3 +204,31 @@ def user(cls) -> Mapped["User"]:
         Relationship to the User that owns this episodic event.
         """
         return relationship("User", lazy="selectin")
+
+    def to_pydantic(self) -> PydanticEpisodicEvent:
+        """
+        Convert to Pydantic model, safely handling relationship loading.
+        This prevents MissingGreenlet errors when converting detached instances.
+        """
+        # Build dict with only scalar fields (no relationships)
+        # Pydantic model expects scalars only; relationships are foreign keys
+        state = {
+            "id": self.id,
+            "agent_id": self.agent_id,
+            "client_id": self.client_id,
+            "user_id": self.user_id,
+            "organization_id": self.organization_id,
+            "occurred_at": self.occurred_at,
+            "last_modify": self.last_modify,
+            "actor": self.actor,
+            "event_type": self.event_type,
+            "summary": self.summary,
+            "details": self.details,
+            "filter_tags": self.filter_tags,
+            "embedding_config": self.embedding_config,
+            "details_embedding": self.details_embedding,
+            "summary_embedding": self.summary_embedding,
+            "created_at": self.created_at,
+            "updated_at": self.updated_at,
+        }
+        return self.__pydantic_model__(**state)
diff --git a/mirix/orm/knowledge_vault.py b/mirix/orm/knowledge_vault.py
index d9e7279cf..8ebe69374 100755
--- a/mirix/orm/knowledge_vault.py
+++ b/mirix/orm/knowledge_vault.py
@@ -176,3 +176,28 @@ def user(cls) -> Mapped["User"]:
         Relationship to the User that owns this knowledge vault item.
         """
         return relationship("User", lazy="selectin")
+
+    def to_pydantic(self) -> "PydanticKnowledgeVaultItem":
+        """
+        Convert to Pydantic model, safely handling relationship loading.
+        This prevents MissingGreenlet errors when converting detached instances.
+        """
+        state = {
+            "id": self.id,
+            "agent_id": self.agent_id,
+            "client_id": self.client_id,
+            "user_id": self.user_id,
+            "organization_id": self.organization_id,
+            "entry_type": self.entry_type,
+            "source": self.source,
+            "sensitivity": self.sensitivity,
+            "secret_value": self.secret_value,
+            "caption": self.caption,
+            "filter_tags": self.filter_tags,
+            "last_modify": self.last_modify,
+            "embedding_config": self.embedding_config,
+            "caption_embedding": self.caption_embedding,
+            "created_at": self.created_at,
+            "updated_at": self.updated_at,
+        }
+        return self.__pydantic_model__(**state)
diff --git a/mirix/orm/procedural_memory.py b/mirix/orm/procedural_memory.py
index efbbe7a01..db45ad4d2 100755
--- a/mirix/orm/procedural_memory.py
+++ b/mirix/orm/procedural_memory.py
@@ -163,3 +163,27 @@ def user(cls) -> Mapped["User"]:
         Relationship to the User that owns this procedural memory item.
         """
         return relationship("User", lazy="selectin")
+
+    def to_pydantic(self) -> "PydanticProceduralMemoryItem":
+        """
+        Convert to Pydantic model, safely handling relationship loading.
+        This prevents MissingGreenlet errors when converting detached instances.
+        """
+        state = {
+            "id": self.id,
+            "agent_id": self.agent_id,
+            "client_id": self.client_id,
+            "user_id": self.user_id,
+            "organization_id": self.organization_id,
+            "entry_type": self.entry_type,
+            "summary": self.summary,
+            "steps": self.steps,
+            "filter_tags": self.filter_tags,
+            "last_modify": self.last_modify,
+            "embedding_config": self.embedding_config,
+            "summary_embedding": self.summary_embedding,
+            "steps_embedding": self.steps_embedding,
+            "created_at": self.created_at,
+            "updated_at": self.updated_at,
+        }
+        return self.__pydantic_model__(**state)
diff --git a/mirix/orm/resource_memory.py b/mirix/orm/resource_memory.py
index 175e915d7..6b51397e2 100755
--- a/mirix/orm/resource_memory.py
+++ b/mirix/orm/resource_memory.py
@@ -163,3 +163,27 @@ def user(cls) -> Mapped["User"]:
         Relationship to the User that owns this resource memory item.
         """
         return relationship("User", lazy="selectin")
+
+    def to_pydantic(self) -> "PydanticResourceMemoryItem":
+        """
+        Convert to Pydantic model, safely handling relationship loading.
+        This prevents MissingGreenlet errors when converting detached instances.
+        """
+        state = {
+            "id": self.id,
+            "agent_id": self.agent_id,
+            "client_id": self.client_id,
+            "user_id": self.user_id,
+            "organization_id": self.organization_id,
+            "title": self.title,
+            "summary": self.summary,
+            "content": self.content,
+            "resource_type": self.resource_type,
+            "filter_tags": self.filter_tags,
+            "last_modify": self.last_modify,
+            "embedding_config": self.embedding_config,
+            "summary_embedding": self.summary_embedding,
+            "created_at": self.created_at,
+            "updated_at": self.updated_at,
+        }
+        return self.__pydantic_model__(**state)
diff --git a/mirix/orm/semantic_memory.py b/mirix/orm/semantic_memory.py
index 1ee83d578..336cf8730 100755
--- a/mirix/orm/semantic_memory.py
+++ b/mirix/orm/semantic_memory.py
@@ -181,3 +181,29 @@ def user(cls) -> Mapped["User"]:
         Relationship to the User that owns this semantic memory item.
         """
         return relationship("User", lazy="selectin")
+
+    def to_pydantic(self) -> PydanticSemanticMemoryItem:
+        """
+        Convert to Pydantic model, safely handling relationship loading.
+        This prevents MissingGreenlet errors when converting detached instances.
+        """
+        state = {
+            "id": self.id,
+            "agent_id": self.agent_id,
+            "client_id": self.client_id,
+            "user_id": self.user_id,
+            "organization_id": self.organization_id,
+            "name": self.name,
+            "summary": self.summary,
+            "details": self.details,
+            "source": self.source,
+            "filter_tags": self.filter_tags,
+            "last_modify": self.last_modify,
+            "embedding_config": self.embedding_config,
+            "name_embedding": self.name_embedding,
+            "summary_embedding": self.summary_embedding,
+            "details_embedding": self.details_embedding,
+            "created_at": self.created_at,
+            "updated_at": self.updated_at,
+        }
+        return self.__pydantic_model__(**state)
diff --git a/mirix/orm/sqlalchemy_base.py b/mirix/orm/sqlalchemy_base.py
index 8811943e7..bee2b47e5 100755
--- a/mirix/orm/sqlalchemy_base.py
+++ b/mirix/orm/sqlalchemy_base.py
@@ -29,6 +29,11 @@
 
 logger = get_logger(__name__)
 
+# Diagnostic flag for MissingGreenlet debugging - set via env var
+import os
+
+_TRACE_MISSING_GREENLET = os.getenv("MIRIX_TRACE_MISSING_GREENLET", "false").lower() == "true"
+
 
 def handle_db_timeout(func):
     """Decorator to handle SQLAlchemy TimeoutError (async-aware)."""
@@ -39,9 +44,7 @@ async def wrapper(*args, **kwargs):
             return await func(*args, **kwargs)
         except TimeoutError as e:
             logger.error("Timeout while executing %s: %s", func.__name__, e)
-            raise DatabaseTimeoutError(
-                message=f"Timeout occurred in {func.__name__}.", original_exception=e
-            ) from e
+            raise DatabaseTimeoutError(message=f"Timeout occurred in {func.__name__}.", original_exception=e) from e
 
     return wrapper
 
@@ -615,6 +618,20 @@ def __pydantic_model__(self) -> "BaseModel":
 
     def to_pydantic(self) -> "BaseModel":
         """converts to the basic pydantic model counterpart"""
+        if _TRACE_MISSING_GREENLET:
+            try:
+                return self.__pydantic_model__.model_validate(self)
+            except Exception as e:
+                if "MissingGreenlet" in str(type(e).__name__) or "greenlet" in str(e).lower():
+                    import traceback
+
+                    logger.error(
+                        "MissingGreenlet detected in to_pydantic for %s (id=%s)\n" "Full traceback:\n%s",
+                        self.__class__.__name__,
+                        getattr(self, "id", "no-id"),
+                        traceback.format_exc(),
+                    )
+                raise
         return self.__pydantic_model__.model_validate(self)
 
     def to_record(self) -> "BaseModel":
diff --git a/mirix/queue/kafka_queue.py b/mirix/queue/kafka_queue.py
index 5b1fad374..5865c11c3 100644
--- a/mirix/queue/kafka_queue.py
+++ b/mirix/queue/kafka_queue.py
@@ -53,10 +53,7 @@ def __init__(
         try:
             from aiokafka import AIOKafkaConsumer, AIOKafkaProducer
         except ImportError:
-            raise ImportError(
-                "aiokafka is required for Kafka support. "
-                "Install it with: pip install aiokafka"
-            )
+            raise ImportError("aiokafka is required for Kafka support. " "Install it with: pip install aiokafka")
 
         logger.info(
             "Initializing Kafka queue: servers=%s, topic=%s, group=%s, format=%s, security=%s",
@@ -165,9 +162,7 @@ async def get(self, timeout: Optional[float] = None) -> QueueMessage:
         logger.debug("Polling Kafka topic %s for messages (timeout=%.1fs)", self.topic, effective_timeout)
 
         try:
-            record = await asyncio.wait_for(
-                self.consumer.getone(), timeout=effective_timeout
-            )
+            record = await asyncio.wait_for(self.consumer.getone(), timeout=effective_timeout)
         except asyncio.TimeoutError:
             logger.debug("No message available from Kafka within timeout")
             raise
diff --git a/mirix/queue/manager.py b/mirix/queue/manager.py
index 53e6614a9..5beac6f49 100644
--- a/mirix/queue/manager.py
+++ b/mirix/queue/manager.py
@@ -7,7 +7,6 @@
 user_id-based partitioning for parallel processing.
 """
 
-import logging
 from typing import Any, List, Optional
 
 from mirix.log import get_logger
diff --git a/mirix/queue/memory_queue.py b/mirix/queue/memory_queue.py
index cd333b49d..4d71efbdf 100644
--- a/mirix/queue/memory_queue.py
+++ b/mirix/queue/memory_queue.py
@@ -7,7 +7,7 @@
 
 import asyncio
 import logging
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
 from mirix.queue.message_pb2 import QueueMessage
 from mirix.queue.queue_interface import QueueInterface
@@ -66,9 +66,7 @@ class PartitionedMemoryQueue(QueueInterface):
     def __init__(self, num_partitions: int = 1, round_robin: bool = False):
         self._num_partitions = max(1, num_partitions)
         self._round_robin = round_robin
-        self._partitions: List[asyncio.Queue[QueueMessage]] = [
-            asyncio.Queue() for _ in range(self._num_partitions)
-        ]
+        self._partitions: List[asyncio.Queue[QueueMessage]] = [asyncio.Queue() for _ in range(self._num_partitions)]
 
         self._user_partition_map: Dict[str, int] = {}
         self._next_partition: int = 0
@@ -89,7 +87,7 @@ def num_partitions(self) -> int:
     def round_robin(self) -> bool:
         return self._round_robin
 
-    async def get_partition_stats(self) -> Dict[str, any]:
+    async def get_partition_stats(self) -> Dict[str, Any]:
         """Get statistics about partition distribution."""
         async with self._partition_lock:
             partition_counts = [0] * self._num_partitions
@@ -145,9 +143,7 @@ async def get(self, timeout: Optional[float] = None) -> QueueMessage:
         """Retrieve from partition 0 (for backward compatibility)."""
         return await self.get_from_partition(0, timeout)
 
-    async def get_from_partition(
-        self, partition_id: int, timeout: Optional[float] = None
-    ) -> QueueMessage:
+    async def get_from_partition(self, partition_id: int, timeout: Optional[float] = None) -> QueueMessage:
         """
         Retrieve a message from a specific partition.
 
@@ -160,15 +156,10 @@ async def get_from_partition(
             ValueError: If partition_id is out of range
         """
         if partition_id < 0 or partition_id >= self._num_partitions:
-            raise ValueError(
-                f"Invalid partition_id {partition_id}, "
-                f"must be 0 to {self._num_partitions - 1}"
-            )
+            raise ValueError(f"Invalid partition_id {partition_id}, " f"must be 0 to {self._num_partitions - 1}")
 
         if timeout is not None:
-            message = await asyncio.wait_for(
-                self._partitions[partition_id].get(), timeout=timeout
-            )
+            message = await asyncio.wait_for(self._partitions[partition_id].get(), timeout=timeout)
         else:
             message = await self._partitions[partition_id].get()
 
diff --git a/mirix/queue/message_pb2.py b/mirix/queue/message_pb2.py
index 32cdfd9c3..5c721bc0b 100644
--- a/mirix/queue/message_pb2.py
+++ b/mirix/queue/message_pb2.py
@@ -9,48 +9,46 @@
 from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
 from google.protobuf.internal import builder as _builder
+
 _runtime_version.ValidateProtobufRuntimeVersion(
-    _runtime_version.Domain.PUBLIC,
-    5,
-    27,
-    2,
-    '',
-    'mirix/queue/message.proto'
+    _runtime_version.Domain.PUBLIC, 5, 27, 2, "", "mirix/queue/message.proto"
 )
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
 
+# Well-known protos must be in the default pool before AddSerializedFile
+# (protobuf 5+); see google/protobuf/timestamp.proto deps in message.proto.
+from google.protobuf import struct_pb2 as _struct_pb2  # noqa: F401
+from google.protobuf import timestamp_pb2 as _timestamp_pb2  # noqa: F401
 
-from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
-from google.protobuf import struct_pb2 as google_dot_protobuf_dot_struct__pb2
-
-
-DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x19mirix/queue/message.proto\x12\x05mirix\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto\"\xaf\x05\n\x0cQueueMessage\x12\x11\n\tclient_id\x18\x01 \x01(\t\x12\x10\n\x08\x61gent_id\x18\x02 \x01(\t\x12,\n\x0einput_messages\x18\x03 \x03(\x0b\x32\x14.mirix.MessageCreate\x12\x15\n\x08\x63haining\x18\x04 \x01(\x08H\x00\x88\x01\x01\x12\x14\n\x07user_id\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07verbose\x18\x06 \x01(\x08H\x02\x88\x01\x01\x12,\n\x0b\x66ilter_tags\x18\x07 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x16\n\tuse_cache\x18\x08 \x01(\x08H\x03\x88\x01\x01\x12\x18\n\x0boccurred_at\x18\t \x01(\tH\x04\x88\x01\x01\x12\x1e\n\x11langfuse_trace_id\x18\n \x01(\tH\x05\x88\x01\x01\x12$\n\x17langfuse_observation_id\x18\x0b \x01(\tH\x06\x88\x01\x01\x12 \n\x13langfuse_session_id\x18\x0c \x01(\tH\x07\x88\x01\x01\x12\x1d\n\x10langfuse_user_id\x18\r \x01(\tH\x08\x88\x01\x01\x12\x32\n\x11\x62lock_filter_tags\x18\x0e \x01(\x0b\x32\x17.google.protobuf.Struct\x12*\n\x1d\x62lock_filter_tags_update_mode\x18\x0f \x01(\tH\t\x88\x01\x01\x42\x0b\n\t_chainingB\n\n\x08_user_idB\n\n\x08_verboseB\x0c\n\n_use_cacheB\x0e\n\x0c_occurred_atB\x14\n\x12_langfuse_trace_idB\x1a\n\x18_langfuse_observation_idB\x16\n\x14_langfuse_session_idB\x13\n\x11_langfuse_user_idB \n\x1e_block_filter_tags_update_mode\"\xcf\x01\n\x04User\x12\n\n\x02id\x18\x01 \x01(\t\x12\x17\n\x0forganization_id\x18\x02 \x01(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t\x12\x10\n\x08timezone\x18\x05 \x01(\t\x12.\n\ncreated_at\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12.\n\nupdated_at\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x12\n\nis_deleted\x18\x08 \x01(\x08\"\xd9\x02\n\rMessageCreate\x12\'\n\x04role\x18\x01 \x01(\x0e\x32\x19.mirix.MessageCreate.Role\x12\x16\n\x0ctext_content\x18\x02 \x01(\tH\x00\x12\x37\n\x12structured_content\x18\x03 \x01(\x0b\x32\x19.mirix.MessageContentListH\x00\x12\x11\n\x04name\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04otid\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tsender_id\x18\x06 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08group_id\x18\x07 \x01(\tH\x04\x88\x01\x01\"<\n\x04Role\x12\x14\n\x10ROLE_UNSPECIFIED\x10\x00\x12\r\n\tROLE_USER\x10\x01\x12\x0f\n\x0bROLE_SYSTEM\x10\x02\x42\x0e\n\x0c\x63ontent_typeB\x07\n\x05_nameB\x07\n\x05_otidB\x0c\n\n_sender_idB\x0b\n\t_group_id\">\n\x12MessageContentList\x12(\n\x05parts\x18\x01 \x03(\x0b\x32\x19.mirix.MessageContentPart\"\xbc\x01\n\x12MessageContentPart\x12\"\n\x04text\x18\x01 \x01(\x0b\x32\x12.mirix.TextContentH\x00\x12$\n\x05image\x18\x02 \x01(\x0b\x32\x13.mirix.ImageContentH\x00\x12\"\n\x04\x66ile\x18\x03 \x01(\x0b\x32\x12.mirix.FileContentH\x00\x12-\n\ncloud_file\x18\x04 \x01(\x0b\x32\x17.mirix.CloudFileContentH\x00\x42\t\n\x07\x63ontent\"\x1b\n\x0bTextContent\x12\x0c\n\x04text\x18\x01 \x01(\t\"@\n\x0cImageContent\x12\x10\n\x08image_id\x18\x01 \x01(\t\x12\x13\n\x06\x64\x65tail\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\t\n\x07_detail\"\x1e\n\x0b\x46ileContent\x12\x0f\n\x07\x66ile_id\x18\x01 \x01(\t\"*\n\x10\x43loudFileContent\x12\x16\n\x0e\x63loud_file_uri\x18\x01 \x01(\tb\x06proto3')
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x19mirix/queue/message.proto\x12\x05mirix\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1cgoogle/protobuf/struct.proto"\xaf\x05\n\x0cQueueMessage\x12\x11\n\tclient_id\x18\x01 \x01(\t\x12\x10\n\x08\x61gent_id\x18\x02 \x01(\t\x12,\n\x0einput_messages\x18\x03 \x03(\x0b\x32\x14.mirix.MessageCreate\x12\x15\n\x08\x63haining\x18\x04 \x01(\x08H\x00\x88\x01\x01\x12\x14\n\x07user_id\x18\x05 \x01(\tH\x01\x88\x01\x01\x12\x14\n\x07verbose\x18\x06 \x01(\x08H\x02\x88\x01\x01\x12,\n\x0b\x66ilter_tags\x18\x07 \x01(\x0b\x32\x17.google.protobuf.Struct\x12\x16\n\tuse_cache\x18\x08 \x01(\x08H\x03\x88\x01\x01\x12\x18\n\x0boccurred_at\x18\t \x01(\tH\x04\x88\x01\x01\x12\x1e\n\x11langfuse_trace_id\x18\n \x01(\tH\x05\x88\x01\x01\x12$\n\x17langfuse_observation_id\x18\x0b \x01(\tH\x06\x88\x01\x01\x12 \n\x13langfuse_session_id\x18\x0c \x01(\tH\x07\x88\x01\x01\x12\x1d\n\x10langfuse_user_id\x18\r \x01(\tH\x08\x88\x01\x01\x12\x32\n\x11\x62lock_filter_tags\x18\x0e \x01(\x0b\x32\x17.google.protobuf.Struct\x12*\n\x1d\x62lock_filter_tags_update_mode\x18\x0f \x01(\tH\t\x88\x01\x01\x42\x0b\n\t_chainingB\n\n\x08_user_idB\n\n\x08_verboseB\x0c\n\n_use_cacheB\x0e\n\x0c_occurred_atB\x14\n\x12_langfuse_trace_idB\x1a\n\x18_langfuse_observation_idB\x16\n\x14_langfuse_session_idB\x13\n\x11_langfuse_user_idB \n\x1e_block_filter_tags_update_mode"\xcf\x01\n\x04User\x12\n\n\x02id\x18\x01 \x01(\t\x12\x17\n\x0forganization_id\x18\x02 \x01(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t\x12\x10\n\x08timezone\x18\x05 \x01(\t\x12.\n\ncreated_at\x18\x06 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12.\n\nupdated_at\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12\x12\n\nis_deleted\x18\x08 \x01(\x08"\xd9\x02\n\rMessageCreate\x12\'\n\x04role\x18\x01 \x01(\x0e\x32\x19.mirix.MessageCreate.Role\x12\x16\n\x0ctext_content\x18\x02 \x01(\tH\x00\x12\x37\n\x12structured_content\x18\x03 \x01(\x0b\x32\x19.mirix.MessageContentListH\x00\x12\x11\n\x04name\x18\x04 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04otid\x18\x05 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tsender_id\x18\x06 \x01(\tH\x03\x88\x01\x01\x12\x15\n\x08group_id\x18\x07 \x01(\tH\x04\x88\x01\x01"<\n\x04Role\x12\x14\n\x10ROLE_UNSPECIFIED\x10\x00\x12\r\n\tROLE_USER\x10\x01\x12\x0f\n\x0bROLE_SYSTEM\x10\x02\x42\x0e\n\x0c\x63ontent_typeB\x07\n\x05_nameB\x07\n\x05_otidB\x0c\n\n_sender_idB\x0b\n\t_group_id">\n\x12MessageContentList\x12(\n\x05parts\x18\x01 \x03(\x0b\x32\x19.mirix.MessageContentPart"\xbc\x01\n\x12MessageContentPart\x12"\n\x04text\x18\x01 \x01(\x0b\x32\x12.mirix.TextContentH\x00\x12$\n\x05image\x18\x02 \x01(\x0b\x32\x13.mirix.ImageContentH\x00\x12"\n\x04\x66ile\x18\x03 \x01(\x0b\x32\x12.mirix.FileContentH\x00\x12-\n\ncloud_file\x18\x04 \x01(\x0b\x32\x17.mirix.CloudFileContentH\x00\x42\t\n\x07\x63ontent"\x1b\n\x0bTextContent\x12\x0c\n\x04text\x18\x01 \x01(\t"@\n\x0cImageContent\x12\x10\n\x08image_id\x18\x01 \x01(\t\x12\x13\n\x06\x64\x65tail\x18\x02 \x01(\tH\x00\x88\x01\x01\x42\t\n\x07_detail"\x1e\n\x0b\x46ileContent\x12\x0f\n\x07\x66ile_id\x18\x01 \x01(\t"*\n\x10\x43loudFileContent\x12\x16\n\x0e\x63loud_file_uri\x18\x01 \x01(\tb\x06proto3'
+)
 
 _globals = globals()
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'mirix.queue.message_pb2', _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "mirix.queue.message_pb2", _globals)
 if not _descriptor._USE_C_DESCRIPTORS:
-  DESCRIPTOR._loaded_options = None
-  _globals['_QUEUEMESSAGE']._serialized_start=100
-  _globals['_QUEUEMESSAGE']._serialized_end=787
-  _globals['_USER']._serialized_start=790
-  _globals['_USER']._serialized_end=997
-  _globals['_MESSAGECREATE']._serialized_start=1000
-  _globals['_MESSAGECREATE']._serialized_end=1345
-  _globals['_MESSAGECREATE_ROLE']._serialized_start=1224
-  _globals['_MESSAGECREATE_ROLE']._serialized_end=1284
-  _globals['_MESSAGECONTENTLIST']._serialized_start=1347
-  _globals['_MESSAGECONTENTLIST']._serialized_end=1409
-  _globals['_MESSAGECONTENTPART']._serialized_start=1412
-  _globals['_MESSAGECONTENTPART']._serialized_end=1600
-  _globals['_TEXTCONTENT']._serialized_start=1602
-  _globals['_TEXTCONTENT']._serialized_end=1629
-  _globals['_IMAGECONTENT']._serialized_start=1631
-  _globals['_IMAGECONTENT']._serialized_end=1695
-  _globals['_FILECONTENT']._serialized_start=1697
-  _globals['_FILECONTENT']._serialized_end=1727
-  _globals['_CLOUDFILECONTENT']._serialized_start=1729
-  _globals['_CLOUDFILECONTENT']._serialized_end=1771
+    DESCRIPTOR._loaded_options = None
+    _globals["_QUEUEMESSAGE"]._serialized_start = 100
+    _globals["_QUEUEMESSAGE"]._serialized_end = 787
+    _globals["_USER"]._serialized_start = 790
+    _globals["_USER"]._serialized_end = 997
+    _globals["_MESSAGECREATE"]._serialized_start = 1000
+    _globals["_MESSAGECREATE"]._serialized_end = 1345
+    _globals["_MESSAGECREATE_ROLE"]._serialized_start = 1224
+    _globals["_MESSAGECREATE_ROLE"]._serialized_end = 1284
+    _globals["_MESSAGECONTENTLIST"]._serialized_start = 1347
+    _globals["_MESSAGECONTENTLIST"]._serialized_end = 1409
+    _globals["_MESSAGECONTENTPART"]._serialized_start = 1412
+    _globals["_MESSAGECONTENTPART"]._serialized_end = 1600
+    _globals["_TEXTCONTENT"]._serialized_start = 1602
+    _globals["_TEXTCONTENT"]._serialized_end = 1629
+    _globals["_IMAGECONTENT"]._serialized_start = 1631
+    _globals["_IMAGECONTENT"]._serialized_end = 1695
+    _globals["_FILECONTENT"]._serialized_start = 1697
+    _globals["_FILECONTENT"]._serialized_end = 1727
+    _globals["_CLOUDFILECONTENT"]._serialized_start = 1729
+    _globals["_CLOUDFILECONTENT"]._serialized_end = 1771
 # @@protoc_insertion_point(module_scope)
diff --git a/mirix/queue/message_pb2.pyi b/mirix/queue/message_pb2.pyi
index 4822f24f9..854e25c2d 100644
--- a/mirix/queue/message_pb2.pyi
+++ b/mirix/queue/message_pb2.pyi
@@ -1,15 +1,36 @@
-from google.protobuf import timestamp_pb2 as _timestamp_pb2
+from typing import ClassVar as _ClassVar
+from typing import Iterable as _Iterable
+from typing import Mapping as _Mapping
+from typing import Optional as _Optional
+from typing import Union as _Union
+
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import message as _message
 from google.protobuf import struct_pb2 as _struct_pb2
+from google.protobuf import timestamp_pb2 as _timestamp_pb2
 from google.protobuf.internal import containers as _containers
 from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union
 
 DESCRIPTOR: _descriptor.FileDescriptor
 
 class QueueMessage(_message.Message):
-    __slots__ = ("client_id", "agent_id", "input_messages", "chaining", "user_id", "verbose", "filter_tags", "use_cache", "occurred_at", "langfuse_trace_id", "langfuse_observation_id", "langfuse_session_id", "langfuse_user_id", "block_filter_tags", "block_filter_tags_update_mode")
+    __slots__ = (
+        "client_id",
+        "agent_id",
+        "input_messages",
+        "chaining",
+        "user_id",
+        "verbose",
+        "filter_tags",
+        "use_cache",
+        "occurred_at",
+        "langfuse_trace_id",
+        "langfuse_observation_id",
+        "langfuse_session_id",
+        "langfuse_user_id",
+        "block_filter_tags",
+        "block_filter_tags_update_mode",
+    )
     CLIENT_ID_FIELD_NUMBER: _ClassVar[int]
     AGENT_ID_FIELD_NUMBER: _ClassVar[int]
     INPUT_MESSAGES_FIELD_NUMBER: _ClassVar[int]
@@ -40,7 +61,24 @@ class QueueMessage(_message.Message):
     langfuse_user_id: str
     block_filter_tags: _struct_pb2.Struct
     block_filter_tags_update_mode: str
-    def __init__(self, client_id: _Optional[str] = ..., agent_id: _Optional[str] = ..., input_messages: _Optional[_Iterable[_Union[MessageCreate, _Mapping]]] = ..., chaining: bool = ..., user_id: _Optional[str] = ..., verbose: bool = ..., filter_tags: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ..., use_cache: bool = ..., occurred_at: _Optional[str] = ..., langfuse_trace_id: _Optional[str] = ..., langfuse_observation_id: _Optional[str] = ..., langfuse_session_id: _Optional[str] = ..., langfuse_user_id: _Optional[str] = ..., block_filter_tags: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ..., block_filter_tags_update_mode: _Optional[str] = ...) -> None: ...
+    def __init__(
+        self,
+        client_id: _Optional[str] = ...,
+        agent_id: _Optional[str] = ...,
+        input_messages: _Optional[_Iterable[_Union[MessageCreate, _Mapping]]] = ...,
+        chaining: bool = ...,
+        user_id: _Optional[str] = ...,
+        verbose: bool = ...,
+        filter_tags: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ...,
+        use_cache: bool = ...,
+        occurred_at: _Optional[str] = ...,
+        langfuse_trace_id: _Optional[str] = ...,
+        langfuse_observation_id: _Optional[str] = ...,
+        langfuse_session_id: _Optional[str] = ...,
+        langfuse_user_id: _Optional[str] = ...,
+        block_filter_tags: _Optional[_Union[_struct_pb2.Struct, _Mapping]] = ...,
+        block_filter_tags_update_mode: _Optional[str] = ...,
+    ) -> None: ...
 
 class User(_message.Message):
     __slots__ = ("id", "organization_id", "name", "status", "timezone", "created_at", "updated_at", "is_deleted")
@@ -60,15 +98,27 @@ class User(_message.Message):
     created_at: _timestamp_pb2.Timestamp
     updated_at: _timestamp_pb2.Timestamp
     is_deleted: bool
-    def __init__(self, id: _Optional[str] = ..., organization_id: _Optional[str] = ..., name: _Optional[str] = ..., status: _Optional[str] = ..., timezone: _Optional[str] = ..., created_at: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., updated_at: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ..., is_deleted: bool = ...) -> None: ...
+    def __init__(
+        self,
+        id: _Optional[str] = ...,
+        organization_id: _Optional[str] = ...,
+        name: _Optional[str] = ...,
+        status: _Optional[str] = ...,
+        timezone: _Optional[str] = ...,
+        created_at: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
+        updated_at: _Optional[_Union[_timestamp_pb2.Timestamp, _Mapping]] = ...,
+        is_deleted: bool = ...,
+    ) -> None: ...
 
 class MessageCreate(_message.Message):
     __slots__ = ("role", "text_content", "structured_content", "name", "otid", "sender_id", "group_id")
+
     class Role(int, metaclass=_enum_type_wrapper.EnumTypeWrapper):
         __slots__ = ()
         ROLE_UNSPECIFIED: _ClassVar[MessageCreate.Role]
         ROLE_USER: _ClassVar[MessageCreate.Role]
         ROLE_SYSTEM: _ClassVar[MessageCreate.Role]
+
     ROLE_UNSPECIFIED: MessageCreate.Role
     ROLE_USER: MessageCreate.Role
     ROLE_SYSTEM: MessageCreate.Role
@@ -86,7 +136,16 @@ class MessageCreate(_message.Message):
     otid: str
     sender_id: str
     group_id: str
-    def __init__(self, role: _Optional[_Union[MessageCreate.Role, str]] = ..., text_content: _Optional[str] = ..., structured_content: _Optional[_Union[MessageContentList, _Mapping]] = ..., name: _Optional[str] = ..., otid: _Optional[str] = ..., sender_id: _Optional[str] = ..., group_id: _Optional[str] = ...) -> None: ...
+    def __init__(
+        self,
+        role: _Optional[_Union[MessageCreate.Role, str]] = ...,
+        text_content: _Optional[str] = ...,
+        structured_content: _Optional[_Union[MessageContentList, _Mapping]] = ...,
+        name: _Optional[str] = ...,
+        otid: _Optional[str] = ...,
+        sender_id: _Optional[str] = ...,
+        group_id: _Optional[str] = ...,
+    ) -> None: ...
 
 class MessageContentList(_message.Message):
     __slots__ = ("parts",)
@@ -104,7 +163,13 @@ class MessageContentPart(_message.Message):
     image: ImageContent
     file: FileContent
     cloud_file: CloudFileContent
-    def __init__(self, text: _Optional[_Union[TextContent, _Mapping]] = ..., image: _Optional[_Union[ImageContent, _Mapping]] = ..., file: _Optional[_Union[FileContent, _Mapping]] = ..., cloud_file: _Optional[_Union[CloudFileContent, _Mapping]] = ...) -> None: ...
+    def __init__(
+        self,
+        text: _Optional[_Union[TextContent, _Mapping]] = ...,
+        image: _Optional[_Union[ImageContent, _Mapping]] = ...,
+        file: _Optional[_Union[FileContent, _Mapping]] = ...,
+        cloud_file: _Optional[_Union[CloudFileContent, _Mapping]] = ...,
+    ) -> None: ...
 
 class TextContent(_message.Message):
     __slots__ = ("text",)
diff --git a/mirix/queue/message_pb2_grpc.py b/mirix/queue/message_pb2_grpc.py
index e4a393dc2..ae29b37a4 100644
--- a/mirix/queue/message_pb2_grpc.py
+++ b/mirix/queue/message_pb2_grpc.py
@@ -1,24 +1,24 @@
 # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 """Client and server classes corresponding to protobuf-defined services."""
-import grpc
-import warnings
 
+import grpc
 
-GRPC_GENERATED_VERSION = '1.66.2'
+GRPC_GENERATED_VERSION = "1.66.2"
 GRPC_VERSION = grpc.__version__
 _version_not_supported = False
 
 try:
     from grpc._utilities import first_version_is_lower
+
     _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
 except ImportError:
     _version_not_supported = True
 
 if _version_not_supported:
     raise RuntimeError(
-        f'The grpc package installed is at version {GRPC_VERSION},'
-        + f' but the generated code in mirix/queue/message_pb2_grpc.py depends on'
-        + f' grpcio>={GRPC_GENERATED_VERSION}.'
-        + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
-        + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
+        f"The grpc package installed is at version {GRPC_VERSION},"
+        + " but the generated code in mirix/queue/message_pb2_grpc.py depends on"
+        + f" grpcio>={GRPC_GENERATED_VERSION}."
+        + f" Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}"
+        + f" or downgrade your generated code using grpcio-tools<={GRPC_VERSION}."
     )
diff --git a/mirix/queue/queue_util.py b/mirix/queue/queue_util.py
index deea94891..892a5a97a 100644
--- a/mirix/queue/queue_util.py
+++ b/mirix/queue/queue_util.py
@@ -8,11 +8,9 @@
 from mirix.observability import add_trace_to_queue_message
 from mirix.queue.message_pb2 import MessageCreate as ProtoMessageCreate
 from mirix.queue.message_pb2 import QueueMessage
-from mirix.queue.message_pb2 import User as ProtoUser
 from mirix.schemas.client import Client
 from mirix.schemas.enums import MessageRole
 from mirix.schemas.message import MessageCreate
-from mirix.schemas.message import MessageCreate as PydanticMessageCreate
 from mirix.schemas.mirix_message_content import TextContent
 
 logger = logging.getLogger(__name__)
diff --git a/mirix/queue/worker.py b/mirix/queue/worker.py
index 2cde1df6a..59fe0c850 100644
--- a/mirix/queue/worker.py
+++ b/mirix/queue/worker.py
@@ -18,7 +18,6 @@
 if TYPE_CHECKING:
     from mirix.schemas.client import Client
     from mirix.schemas.message import MessageCreate
-    from mirix.schemas.user import User
 
     from .queue_interface import QueueInterface
 
@@ -154,9 +153,7 @@ async def _process_message_async(self, message: QueueMessage) -> None:
             async def _resolve_actor_and_user():
                 actor = await server.client_manager.get_client_by_id(client_id)
                 if not actor:
-                    raise ValueError(
-                        f"Client with id={client_id} not found in database"
-                    )
+                    raise ValueError(f"Client with id={client_id} not found in database")
 
                 user_manager = UserManager()
                 if user_id:
@@ -189,8 +186,7 @@ async def _resolve_actor_and_user():
                             )
                         except Exception as create_error:
                             logger.error(
-                                "Failed to auto-create user with id=%s: %s. "
-                                "Falling back to admin user.",
+                                "Failed to auto-create user with id=%s: %s. " "Falling back to admin user.",
                                 user_id,
                                 create_error,
                             )
@@ -218,9 +214,7 @@ async def _resolve_actor_and_user():
                     raise ValueError("block_filter_tags was provided but could not be parsed as a dict") from e
 
             block_filter_tags_update_mode = (
-                message.block_filter_tags_update_mode
-                if message.HasField("block_filter_tags_update_mode")
-                else "merge"
+                message.block_filter_tags_update_mode if message.HasField("block_filter_tags_update_mode") else "merge"
             )
 
             # Log the processing
@@ -306,9 +300,7 @@ async def _consume_loop(self) -> None:
         while self._running:
             try:
                 if self._partition_id is not None and hasattr(self.queue, "get_from_partition"):
-                    message = await self.queue.get_from_partition(
-                        self._partition_id, timeout=1.0
-                    )
+                    message = await self.queue.get_from_partition(self._partition_id, timeout=1.0)
                 else:
                     message = await self.queue.get(timeout=1.0)
 
diff --git a/mirix/schemas/memory.py b/mirix/schemas/memory.py
index e00f4b99b..4f1258624 100755
--- a/mirix/schemas/memory.py
+++ b/mirix/schemas/memory.py
@@ -5,7 +5,7 @@
 
 # Forward referencing to avoid circular import with Agent -> Memory -> Agent
 if TYPE_CHECKING:
-    from mirix.schemas.agent import AgentState
+    pass
 
 from mirix.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
 from mirix.schemas.block import Block
diff --git a/mirix/schemas/message.py b/mirix/schemas/message.py
index d3ece01cc..515941280 100644
--- a/mirix/schemas/message.py
+++ b/mirix/schemas/message.py
@@ -1,10 +1,8 @@
 from __future__ import annotations
 
-import copy
 import json
 import uuid
 import warnings
-from collections import OrderedDict
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Literal, Optional, Union
 
@@ -609,8 +607,8 @@ def dict_to_message(
             if openai_message_dict["role"] == "assistant":
                 if not content and tool_calls is None:
                     raise ValueError(
-                        f"Invalid assistant message: must have content or tool_calls. "
-                        f"Got empty content and no tool_calls."
+                        "Invalid assistant message: must have content or tool_calls. "
+                        "Got empty content and no tool_calls."
                     )
 
             # If we're going from tool-call style
diff --git a/mirix/schemas/mirix_base.py b/mirix/schemas/mirix_base.py
index 4890489f9..1a1f9f06e 100755
--- a/mirix/schemas/mirix_base.py
+++ b/mirix/schemas/mirix_base.py
@@ -5,6 +5,7 @@
 from uuid import UUID
 
 from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic.fields import FieldInfo
 
 # from: https://gist.github.com/norton120/22242eadb80bf2cf1dd54a961b151c61
 
@@ -30,7 +31,7 @@ class MirixBase(BaseModel):
     #    raise NotImplementedError("All schemas must have an __id_prefix__ attribute!")
 
     @classmethod
-    def generate_id_field(cls, prefix: Optional[str] = None) -> "Field":
+    def generate_id_field(cls, prefix: Optional[str] = None) -> FieldInfo:
         prefix = prefix or cls.__id_prefix__
 
         return Field(
diff --git a/mirix/schemas/openai/chat_completion_request.py b/mirix/schemas/openai/chat_completion_request.py
index 1e5c26d7a..69b2fba69 100755
--- a/mirix/schemas/openai/chat_completion_request.py
+++ b/mirix/schemas/openai/chat_completion_request.py
@@ -70,7 +70,6 @@ class FunctionCall(BaseModel):
 class ToolFunctionChoice(BaseModel):
     # The type of the tool. Currently, only function is supported
     type: Literal["function"] = "function"
-    # type: str = Field(default="function", const=True)
     function: FunctionCall
 
 
@@ -87,7 +86,6 @@ class FunctionSchema(BaseModel):
 class Tool(BaseModel):
     # The type of the tool. Currently, only function is supported
     type: Literal["function"] = "function"
-    # type: str = Field(default="function", const=True)
     function: FunctionSchema
 
 
diff --git a/mirix/schemas/openai/chat_completions.py b/mirix/schemas/openai/chat_completions.py
index da1957771..43c03e359 100755
--- a/mirix/schemas/openai/chat_completions.py
+++ b/mirix/schemas/openai/chat_completions.py
@@ -70,7 +70,6 @@ class FunctionCall(BaseModel):
 class ToolFunctionChoice(BaseModel):
     # The type of the tool. Currently, only function is supported
     type: Literal["function"] = "function"
-    # type: str = Field(default="function", const=True)
     function: FunctionCall
 
 
@@ -87,7 +86,6 @@ class FunctionSchema(BaseModel):
 class Tool(BaseModel):
     # The type of the tool. Currently, only function is supported
     type: Literal["function"] = "function"
-    # type: str = Field(default="function", const=True)
     function: FunctionSchema
 
 
diff --git a/mirix/schemas/providers.py b/mirix/schemas/providers.py
index a3e5c7b4e..011ceff2f 100755
--- a/mirix/schemas/providers.py
+++ b/mirix/schemas/providers.py
@@ -5,7 +5,6 @@
 from pydantic import Field, model_validator
 
 from mirix.constants import LLM_MAX_TOKENS, MIN_CONTEXT_WINDOW
-from mirix.utils import smart_urljoin
 from mirix.llm_api.azure_openai import (
     get_azure_chat_completions_endpoint,
     get_azure_embeddings_endpoint,
@@ -15,6 +14,7 @@
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.llm_config import LLMConfig
 from mirix.schemas.mirix_base import MirixBase
+from mirix.utils import smart_urljoin
 
 logger = get_logger(__name__)
 
diff --git a/mirix/sdk.py b/mirix/sdk.py
index 8632c4ae4..f6fea7720 100644
--- a/mirix/sdk.py
+++ b/mirix/sdk.py
@@ -4,7 +4,6 @@
 All I/O methods are async. Use Mirix.create() to construct an instance.
 """
 
-import asyncio
 import logging
 import os
 from pathlib import Path
@@ -133,6 +132,7 @@ async def create(
             os.environ[f"{model_provider.upper()}_API_KEY"] = api_key
         import mirix.settings
         from mirix.settings import ModelSettings
+
         new_settings = ModelSettings()
         for field_name in ModelSettings.model_fields:
             setattr(
@@ -148,6 +148,7 @@ async def create(
             config_path = Path(config_path)
             if config_path.exists():
                 import yaml
+
                 with open(config_path, "r") as f:
                     config_data = yaml.safe_load(f)
                     system_prompts_folder = config_data.get("system_prompts_folder")
@@ -211,9 +212,7 @@ async def add(self, content: str, **kwargs) -> Dict[str, Any]:
             await memory_agent.add("John likes pizza")
         """
         self._require_meta_agent()
-        response = await self._client.send_message(
-            agent_id=self._meta_agent.id, role="user", message=content, **kwargs
-        )
+        response = await self._client.send_message(agent_id=self._meta_agent.id, role="user", message=content, **kwargs)
         if hasattr(response, "messages") and response.messages:
             for msg in reversed(response.messages):
                 if msg.role == "assistant":
@@ -521,7 +520,9 @@ async def insert_tool(
             )
 
             # Use the tool manager's create_or_update_tool method
-            created_tool = await tool_manager.create_or_update_tool(pydantic_tool=pydantic_tool, actor=self._client.client)
+            created_tool = await tool_manager.create_or_update_tool(
+                pydantic_tool=pydantic_tool, actor=self._client.client
+            )
 
         # Apply tool to all existing agents if requested
         if apply_to_agents:
diff --git a/mirix/server/rest_api.py b/mirix/server/rest_api.py
index 27f306802..8f6baa9fe 100644
--- a/mirix/server/rest_api.py
+++ b/mirix/server/rest_api.py
@@ -4,17 +4,15 @@
 allowing MirixClient instances to communicate with a cloud-hosted server.
 """
 
-import asyncio
-import copy
 import functools
 import json
 import traceback
 from contextlib import asynccontextmanager
 from datetime import datetime
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Dict, List, Optional
 
 import httpx
-from fastapi import APIRouter, Body, FastAPI, Header, HTTPException, Query, Request
+from fastapi import APIRouter, FastAPI, Header, HTTPException, Query, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel, Field
@@ -24,43 +22,29 @@
 from mirix.log import get_logger
 from mirix.orm.errors import NoResultFound
 from mirix.schemas.agent import AgentState, AgentType, CreateAgent
-from mirix.schemas.block import Block, BlockUpdate, CreateBlock, Human, Persona
-from mirix.schemas.client import Client, ClientCreate, ClientUpdate
+from mirix.schemas.block import Block
+from mirix.schemas.client import Client, ClientUpdate
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.enums import MessageRole
-from mirix.schemas.environment_variables import (
-    SandboxEnvironmentVariable,
-    SandboxEnvironmentVariableCreate,
-    SandboxEnvironmentVariableUpdate,
-)
-from mirix.schemas.file import FileMetadata
 from mirix.schemas.llm_config import LLMConfig
-from mirix.schemas.memory import ArchivalMemorySummary, Memory, RecallMemorySummary
+from mirix.schemas.memory import Memory
 from mirix.schemas.message import Message, MessageCreate
 from mirix.schemas.mirix_response import MirixResponse
 from mirix.schemas.organization import Organization
 from mirix.schemas.procedural_memory import ProceduralMemoryItemUpdate
 from mirix.schemas.raw_memory import (
-    RawMemoryItem,
     RawMemoryItemCreateRequest,
     RawMemoryItemUpdate,
     SearchRawMemoryRequest,
     SearchRawMemoryResponse,
 )
 from mirix.schemas.resource_memory import ResourceMemoryItemUpdate
-from mirix.schemas.sandbox_config import (
-    E2BSandboxConfig,
-    LocalSandboxConfig,
-    SandboxConfig,
-    SandboxConfigCreate,
-    SandboxConfigUpdate,
-)
 from mirix.schemas.semantic_memory import SemanticMemoryItemUpdate
-from mirix.schemas.tool import Tool, ToolCreate, ToolUpdate
+from mirix.schemas.tool import Tool
 from mirix.schemas.tool_rule import BaseToolRule
 from mirix.schemas.user import User
 from mirix.server.server import AsyncServer, ensure_tables_created
-from mirix.settings import model_settings, settings
+from mirix.settings import model_settings
 from mirix.utils import convert_message_to_mirix_message
 
 logger = get_logger(__name__)
@@ -521,9 +505,7 @@ async def extract_topics_and_temporal_info(
     return None, None
 
 
-async def extract_topics_from_messages(
-    messages: List[Dict[str, Any]], llm_config: LLMConfig
-) -> Optional[str]:
+async def extract_topics_from_messages(messages: List[Dict[str, Any]], llm_config: LLMConfig) -> Optional[str]:
     """
     Extract topics from a list of messages using LLM.
 
@@ -570,9 +552,7 @@ def _flatten_messages_to_plain_text(messages: List[Dict[str, Any]]) -> str:
     return "\n".join(transcript_parts)
 
 
-async def extract_topics_with_local_model(
-    messages: List[Dict[str, Any]], model_name: str
-) -> Optional[str]:
+async def extract_topics_with_local_model(messages: List[Dict[str, Any]], model_name: str) -> Optional[str]:
     """
     Extract topics using a locally hosted Ollama model via the /api/chat endpoint.
 
@@ -774,13 +754,9 @@ async def create_agent(
     if request.name:
         create_params["name"] = request.name
 
-    agent_state = await server.create_agent(
-        CreateAgent(**create_params), client
-    )
+    agent_state = await server.create_agent(CreateAgent(**create_params), client)
 
-    return await server.agent_manager.get_agent_by_id(
-        agent_state.id, client
-    )
+    return await server.agent_manager.get_agent_by_id(agent_state.id, client)
 
 
 @router.get("/agents/{agent_id}", response_model=AgentState)
@@ -797,10 +773,8 @@ async def get_agent(
     client = await server.client_manager.get_client_by_id(client_id)
 
     try:
-        return await server.agent_manager.get_agent_by_id(
-            agent_id, client
-        )
-    except NoResultFound as e:
+        return await server.agent_manager.get_agent_by_id(agent_id, client)
+    except NoResultFound:
         raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found or not accessible")
 
 
@@ -896,9 +870,7 @@ async def update_agent_system_prompt_by_name(
     client = await server.client_manager.get_client_by_id(client_id)
 
     # List all top-level agents for this client
-    top_level_agents = await server.agent_manager.list_agents(
-        actor=client, limit=1000
-    )
+    top_level_agents = await server.agent_manager.list_agents(actor=client, limit=1000)
     # Also get sub-agents (children of meta agent)
     all_agents = list(top_level_agents)
     for agent in top_level_agents:
@@ -1119,9 +1091,7 @@ async def list_tools(
     server = get_server()
     client_id, org_id = await get_client_and_org(x_client_id, x_org_id)
     client = await server.client_manager.get_client_by_id(client_id)
-    return await server.tool_manager.list_tools(
-        cursor=cursor, limit=limit, actor=client
-    )
+    return await server.tool_manager.list_tools(cursor=cursor, limit=limit, actor=client)
 
 
 @router.get("/tools/{tool_id}", response_model=Tool)
@@ -1270,9 +1240,7 @@ async def list_organizations(
 ):
     """List organizations."""
     server = get_server()
-    return await server.organization_manager.list_organizations(
-        cursor=cursor, limit=limit
-    )
+    return await server.organization_manager.list_organizations(cursor=cursor, limit=limit)
 
 
 @router.post("/organizations", response_model=Organization)
@@ -1283,9 +1251,7 @@ async def create_organization(
 ):
     """Create an organization."""
     server = get_server()
-    return await server.organization_manager.create_organization(
-        pydantic_org=Organization(name=name)
-    )
+    return await server.organization_manager.create_organization(pydantic_org=Organization(name=name))
 
 
 @router.get("/organizations/{org_id}", response_model=Organization)
@@ -1342,9 +1308,7 @@ async def create_or_get_organization(
 
     # Create new organization if it doesn't exist
     org_create = OrganizationCreate(id=org_id, name=request.name or org_id)
-    org = await server.organization_manager.create_organization(
-        pydantic_org=Organization(**org_create.model_dump())
-    )
+    org = await server.organization_manager.create_organization(pydantic_org=Organization(**org_create.model_dump()))
     logger.debug("Created new organization: %s", org_id)
     return org
 
@@ -1890,7 +1854,7 @@ async def delete_client_api_key(
             "message": f"API key {api_key_id} deleted successfully",
             "id": api_key_id,
         }
-    except Exception as e:
+    except Exception:
         raise HTTPException(status_code=404, detail=f"API key {api_key_id} not found")
 
 
@@ -1947,9 +1911,7 @@ async def initialize_meta_agent(
 
     # Check if meta agent already exists for this client
     # list_agents now automatically filters by client (organization_id + _created_by_id)
-    existing_meta_agents = await server.agent_manager.list_agents(
-        actor=client, limit=1000
-    )
+    existing_meta_agents = await server.agent_manager.list_agents(actor=client, limit=1000)
 
     assert len(existing_meta_agents) <= 1, "Only one meta agent can be created per client"
 
@@ -2438,9 +2400,7 @@ async def retrieve_memory_with_conversation(
     filter_tags = dict(request.filter_tags) if request.filter_tags is not None else {}
 
     # Get all agents for this client (automatically filtered by client via apply_access_predicate)
-    all_agents = await server.agent_manager.list_agents(
-        actor=client, limit=1000
-    )
+    all_agents = await server.agent_manager.list_agents(actor=client, limit=1000)
 
     if not all_agents:
         return {
@@ -2484,9 +2444,7 @@ async def retrieve_memory_with_conversation(
 
         if topics is None:
             # NEW: Extract both topics and temporal expression
-            topics, temporal_expr = await extract_topics_and_temporal_info(
-                request.messages, llm_config
-            )
+            topics, temporal_expr = await extract_topics_and_temporal_info(request.messages, llm_config)
 
         logger.debug("Extracted topics: %s, temporal: %s", topics, temporal_expr)
         key_words = topics if topics else ""
@@ -2620,9 +2578,7 @@ async def retrieve_memory_with_topic(
         parsed_filter_tags = {}
 
     # Get all agents for this client (automatically filtered by client via apply_access_predicate)
-    all_agents = await server.agent_manager.list_agents(
-        actor=client, limit=1000
-    )
+    all_agents = await server.agent_manager.list_agents(actor=client, limit=1000)
 
     if not all_agents:
         return {
@@ -2763,9 +2719,7 @@ async def search_memory(
         logger.debug("No user_id provided, using admin user: %s", user_id)
 
     # Get all agents for this client (automatically filtered by client via apply_access_predicate)
-    all_agents = await server.agent_manager.list_agents(
-        actor=client, limit=1000
-    )
+    all_agents = await server.agent_manager.list_agents(actor=client, limit=1000)
 
     if not all_agents:
         return {
@@ -3376,9 +3330,7 @@ async def search_memory_all_users(
             logger.warning("Invalid end_date format: %s", e)
 
     # Get agents for this client
-    all_agents = await server.agent_manager.list_agents(
-        actor=client, limit=1000
-    )
+    all_agents = await server.agent_manager.list_agents(actor=client, limit=1000)
     if not all_agents:
         return {
             "success": False,
@@ -3884,9 +3836,7 @@ async def list_memory_components(
     limit = max(1, min(limit, 200))  # guardrails
 
     # Need an agent state for memory manager configuration
-    agents = await server.agent_manager.list_agents(
-        actor=client, limit=1
-    )
+    agents = await server.agent_manager.list_agents(actor=client, limit=1)
     if not agents:
         raise HTTPException(status_code=404, detail="No agents found for this client")
     agent_state = agents[0]
@@ -4446,9 +4396,7 @@ async def create_raw_memory(
         raise HTTPException(status_code=401, detail="Client or client_id required")
 
     # Get agent_state for embedding generation (required)
-    agents = await server.agent_manager.list_agents(
-        actor=client, limit=1
-    )
+    agents = await server.agent_manager.list_agents(actor=client, limit=1)
     agent_state = agents[0] if agents else None
 
     if not agent_state:
@@ -4623,9 +4571,7 @@ async def update_raw_memory(
             raise HTTPException(status_code=404, detail=f"User {user_id} not found")
 
     # Get agent_state for embedding generation (required)
-    agents = await server.agent_manager.list_agents(
-        actor=client, limit=1
-    )
+    agents = await server.agent_manager.list_agents(actor=client, limit=1)
     agent_state = agents[0] if agents else None
 
     if not agent_state:
@@ -5242,9 +5188,7 @@ async def dashboard_login(request: DashboardLoginRequest):
 
     auth_manager = ClientAuthManager()
 
-    client, access_token, auth_status = await auth_manager.authenticate(
-        request.email, request.password
-    )
+    client, access_token, auth_status = await auth_manager.authenticate(request.email, request.password)
 
     if auth_status == "not_found":
         raise HTTPException(status_code=404, detail="Account does not exist. Please create an account.")
diff --git a/mirix/server/server.py b/mirix/server/server.py
index 3a200188c..d8326ae8f 100644
--- a/mirix/server/server.py
+++ b/mirix/server/server.py
@@ -1,7 +1,6 @@
 # inspecting tools
 import asyncio
 import os
-import sys
 import traceback
 import warnings
 from abc import abstractmethod
@@ -16,9 +15,7 @@
 from rich.console import Console
 from rich.panel import Panel
 from rich.text import Text
-from sqlalchemy import create_engine
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
-from sqlalchemy.orm import sessionmaker
 
 import mirix.constants as constants
 import mirix.server.utils as server_utils
@@ -45,7 +42,7 @@
 from mirix.log import get_logger
 from mirix.orm import Base
 from mirix.orm.errors import NoResultFound
-from mirix.schemas.agent import AgentState, AgentType, CreateAgent, CreateMetaAgent
+from mirix.schemas.agent import AgentState, AgentType, CreateAgent
 from mirix.schemas.client import Client
 from mirix.schemas.embedding_config import EmbeddingConfig
 
@@ -311,7 +308,7 @@ async def __call__(self):
 
     # asyncpg does not accept 'sslmode' as a keyword argument — strip it from
     # the URI and pass an ssl.SSLContext via connect_args instead.
-    from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
+    from urllib.parse import parse_qs, urlencode, urlparse, urlunparse
 
     _parsed = urlparse(_pg_uri)
     _params = parse_qs(_parsed.query, keep_blank_values=True)
@@ -432,6 +429,7 @@ async def get_db():
     def db_context():
         """Async context manager for service managers (PGlite)."""
         return pglite_session_factory()
+
 else:
 
     async def get_db():
@@ -645,9 +643,7 @@ async def load_agent(
         """Updated method to load agents from persisted storage."""
         agent_lock = self.per_agent_lock_manager.get_lock(agent_id)
         async with agent_lock:
-            agent_state = await self.agent_manager.get_agent_by_id(
-                agent_id=agent_id, actor=actor
-            )
+            agent_state = await self.agent_manager.get_agent_by_id(agent_id=agent_id, actor=actor)
 
             common_kwargs = dict(
                 interface=interface or self.default_interface_factory(),
diff --git a/mirix/services/admin_user_manager.py b/mirix/services/admin_user_manager.py
index 1b4a706d9..5872b8628 100644
--- a/mirix/services/admin_user_manager.py
+++ b/mirix/services/admin_user_manager.py
@@ -278,9 +278,7 @@ async def register_client_for_dashboard(
             return client.to_pydantic()
 
     @enforce_types
-    async def authenticate(
-        self, email: str, password: str
-    ) -> Tuple[Optional[PydanticClient], Optional[str], str]:
+    async def authenticate(self, email: str, password: str) -> Tuple[Optional[PydanticClient], Optional[str], str]:
         """
         Authenticate a client for dashboard access and return client + JWT token.
 
@@ -310,27 +308,19 @@ async def authenticate(
             client = result.scalar_one_or_none()
 
             if not client:
-                logger.warning(
-                    "Login attempt for non-existent email: %s", email
-                )
+                logger.warning("Login attempt for non-existent email: %s", email)
                 return None, None, "not_found"
 
             if client.status != "active":
-                logger.warning(
-                    "Login attempt for inactive client: %s", email
-                )
+                logger.warning("Login attempt for inactive client: %s", email)
                 return None, None, "inactive"
 
             if not client.password_hash:
-                logger.warning(
-                    "Login attempt for client without password: %s", email
-                )
+                logger.warning("Login attempt for client without password: %s", email)
                 return None, None, "no_password"
 
             if not self.verify_password(password, client.password_hash):
-                logger.warning(
-                    "Failed login attempt for client: %s", email
-                )
+                logger.warning("Failed login attempt for client: %s", email)
                 return None, None, "wrong_password"
 
             client.last_login = datetime.now(timezone.utc)
@@ -347,9 +337,7 @@ async def get_client_by_id(self, client_id: str) -> Optional[PydanticClient]:
         """Get a client by ID."""
         async with self.session_maker() as session:
             try:
-                client = await ClientModel.read(
-                    db_session=session, identifier=client_id
-                )
+                client = await ClientModel.read(db_session=session, identifier=client_id)
                 if client.is_deleted:
                     return None
                 return client.to_pydantic()
@@ -375,9 +363,7 @@ async def get_client_by_email(self, email: str) -> Optional[PydanticClient]:
             return None
 
     @enforce_types
-    async def list_dashboard_clients(
-        self, cursor: Optional[str] = None, limit: int = 50
-    ) -> List[PydanticClient]:
+    async def list_dashboard_clients(self, cursor: Optional[str] = None, limit: int = 50) -> List[PydanticClient]:
         """List all clients that have dashboard access (email set)."""
         async with self.session_maker() as session:
             stmt = (
@@ -397,9 +383,7 @@ async def list_dashboard_clients(
             return [client.to_pydantic() for client in clients]
 
     @enforce_types
-    async def set_client_password(
-        self, client_id: str, email: str, password: str
-    ) -> PydanticClient:
+    async def set_client_password(self, client_id: str, email: str, password: str) -> PydanticClient:
         """
         Set dashboard credentials for an existing client.
 
@@ -412,9 +396,7 @@ async def set_client_password(
             Updated client
         """
         async with self.session_maker() as session:
-            client = await ClientModel.read(
-                db_session=session, identifier=client_id
-            )
+            client = await ClientModel.read(db_session=session, identifier=client_id)
 
             if client.is_deleted:
                 raise ValueError("Cannot update deleted client")
@@ -430,9 +412,7 @@ async def set_client_password(
             )
             result = await session.execute(stmt)
             if result.scalar_one_or_none():
-                raise ValueError(
-                    f"Email '{email}' already exists on another client"
-                )
+                raise ValueError(f"Email '{email}' already exists on another client")
 
             client.email = email.lower()
             client.password_hash = self.hash_password(password)
@@ -442,9 +422,7 @@ async def set_client_password(
             return client.to_pydantic()
 
     @enforce_types
-    async def change_password(
-        self, client_id: str, current_password: str, new_password: str
-    ) -> bool:
+    async def change_password(self, client_id: str, current_password: str, new_password: str) -> bool:
         """
         Change a client's dashboard password.
 
@@ -457,9 +435,7 @@ async def change_password(
             True if successful, False otherwise
         """
         async with self.session_maker() as session:
-            client = await ClientModel.read(
-                db_session=session, identifier=client_id
-            )
+            client = await ClientModel.read(db_session=session, identifier=client_id)
 
             if not client.password_hash:
                 logger.warning(
@@ -468,9 +444,7 @@ async def change_password(
                 )
                 return False
 
-            if not self.verify_password(
-                current_password, client.password_hash
-            ):
+            if not self.verify_password(current_password, client.password_hash):
                 logger.warning(
                     "Password change failed: incorrect current password for %s",
                     client_id,
@@ -489,9 +463,13 @@ async def count_dashboard_clients(self) -> int:
         from sqlalchemy import func
 
         async with self.session_maker() as session:
-            stmt = select(func.count()).select_from(ClientModel).where(
-                ClientModel.is_deleted == False,
-                ClientModel.email.isnot(None),
+            stmt = (
+                select(func.count())
+                .select_from(ClientModel)
+                .where(
+                    ClientModel.is_deleted == False,
+                    ClientModel.email.isnot(None),
+                )
             )
             result = await session.execute(stmt)
             return result.scalar() or 0
diff --git a/mirix/services/agent_manager.py b/mirix/services/agent_manager.py
index 3250146e6..97a934e81 100644
--- a/mirix/services/agent_manager.py
+++ b/mirix/services/agent_manager.py
@@ -22,13 +22,16 @@
 )
 from mirix.log import get_logger
 from mirix.orm import Agent as AgentModel
-from mirix.orm import Block as BlockModel
 from mirix.orm import Tool as ToolModel
 from mirix.orm.errors import NoResultFound
+
+logger = get_logger(__name__)
+
+# Diagnostic flag for MissingGreenlet debugging
+
+_TRACE_MISSING_GREENLET = os.getenv("MIRIX_TRACE_MISSING_GREENLET", "false").lower() == "true"
 from mirix.schemas.agent import AgentState as PydanticAgentState
 from mirix.schemas.agent import AgentType, CreateAgent, CreateMetaAgent, UpdateAgent, UpdateMetaAgent
-from mirix.schemas.block import Block
-from mirix.schemas.block import Block as PydanticBlock
 from mirix.schemas.client import Client as PydanticClient
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.enums import ToolType
@@ -681,7 +684,9 @@ async def append_initial_message_sequence_to_in_context_messages(
         init_messages = self._generate_initial_message_sequence(
             actor, agent_state, initial_message_sequence, user_id=user_id
         )
-        return await self.append_to_in_context_messages(init_messages, agent_id=agent_state.id, actor=actor, user_id=user_id)
+        return await self.append_to_in_context_messages(
+            init_messages, agent_id=agent_state.id, actor=actor, user_id=user_id
+        )
 
     @enforce_types
     async def _create_agent(
@@ -748,7 +753,9 @@ async def update_agent(self, agent_id: str, agent_update: UpdateAgent, actor: Py
         return agent_state
 
     @enforce_types
-    async def update_llm_config(self, agent_id: str, llm_config: LLMConfig, actor: PydanticClient) -> PydanticAgentState:
+    async def update_llm_config(
+        self, agent_id: str, llm_config: LLMConfig, actor: PydanticClient
+    ) -> PydanticAgentState:
         return await self.update_agent(
             agent_id=agent_id,
             agent_update=UpdateAgent(llm_config=llm_config),
@@ -756,7 +763,9 @@ async def update_llm_config(self, agent_id: str, llm_config: LLMConfig, actor: P
         )
 
     @enforce_types
-    async def update_system_prompt(self, agent_id: str, system_prompt: str, actor: PydanticClient) -> PydanticAgentState:
+    async def update_system_prompt(
+        self, agent_id: str, system_prompt: str, actor: PydanticClient
+    ) -> PydanticAgentState:
         agent_state = await self.update_agent(
             agent_id=agent_id,
             agent_update=UpdateAgent(system=system_prompt),
@@ -812,7 +821,9 @@ async def add_mcp_tool(
         return agent_state
 
     @enforce_types
-    async def _update_agent(self, agent_id: str, agent_update: UpdateAgent, actor: PydanticClient) -> PydanticAgentState:
+    async def _update_agent(
+        self, agent_id: str, agent_update: UpdateAgent, actor: PydanticClient
+    ) -> PydanticAgentState:
         """
         Update an existing agent.
 
@@ -1108,7 +1119,9 @@ async def _get_children_from_db(self, parent_ids: List[str], session: Session, a
         )
         return children_by_parent
 
-    async def _get_children_from_redis(self, parent_id: str, actor: PydanticClient) -> Optional[List[PydanticAgentState]]:
+    async def _get_children_from_redis(
+        self, parent_id: str, actor: PydanticClient
+    ) -> Optional[List[PydanticAgentState]]:
         """
         Fetch children from Redis cache using parent's children_ids.
 
@@ -1261,7 +1274,25 @@ async def list_agents(
             )
 
             # Convert to Pydantic
-            agent_states = [agent.to_pydantic() for agent in agents]
+            if _TRACE_MISSING_GREENLET:
+                logger.info("Converting %d agents to Pydantic in list_agents", len(agents))
+                agent_states = []
+                for i, agent in enumerate(agents):
+                    try:
+                        agent_states.append(agent.to_pydantic())
+                    except Exception as e:
+                        if "MissingGreenlet" in str(type(e).__name__) or "greenlet" in str(e).lower():
+                            import traceback
+
+                            logger.error(
+                                "MissingGreenlet in list_agents at index %d, agent_id=%s\n" "Full traceback:\n%s",
+                                i,
+                                agent.id,
+                                traceback.format_exc(),
+                            )
+                        raise
+            else:
+                agent_states = [agent.to_pydantic() for agent in agents]
 
             # If there are no agents, return early
             if not agent_states:
@@ -1388,7 +1419,23 @@ async def get_agent_by_id(self, agent_id: str, actor: PydanticClient) -> Pydanti
                 identifier=agent_id,
                 actor=actor,  # Triggers client-level filtering via apply_access_predicate
             )
-            pydantic_agent = agent.to_pydantic()
+
+            if _TRACE_MISSING_GREENLET:
+                try:
+                    logger.info("Converting agent %s to Pydantic in get_agent_by_id", agent_id)
+                    pydantic_agent = agent.to_pydantic()
+                except Exception as e:
+                    if "MissingGreenlet" in str(type(e).__name__) or "greenlet" in str(e).lower():
+                        import traceback
+
+                        logger.error(
+                            "MissingGreenlet in get_agent_by_id for agent_id=%s\n" "Full traceback:\n%s",
+                            agent_id,
+                            traceback.format_exc(),
+                        )
+                    raise
+            else:
+                pydantic_agent = agent.to_pydantic()
 
             # Populate cache for next time
             try:
@@ -1723,7 +1770,9 @@ async def reset_messages(
             agent_state = agent.to_pydantic()
 
         if add_default_initial_messages:
-            return await self.append_initial_message_sequence_to_in_context_messages(actor, agent_state, user_id=user_id)
+            return await self.append_initial_message_sequence_to_in_context_messages(
+                actor, agent_state, user_id=user_id
+            )
         else:
             # We still want to always have a system message
             init_messages = initialize_message_sequence(
diff --git a/mirix/services/block_manager.py b/mirix/services/block_manager.py
index 1fc4ea51e..fe238ebb1 100755
--- a/mirix/services/block_manager.py
+++ b/mirix/services/block_manager.py
@@ -566,9 +566,10 @@ async def delete_by_user_id(self, user_id: str) -> int:
         Returns:
             Number of records deleted
         """
-        from mirix.database.redis_client import get_redis_client
         from sqlalchemy import delete
 
+        from mirix.database.redis_client import get_redis_client
+
         async with self.session_maker() as session:
             stmt = select(BlockModel.id).where(BlockModel.user_id == user_id)
             result = await session.execute(stmt)
diff --git a/mirix/services/client_manager.py b/mirix/services/client_manager.py
index ffcfecd47..a40cdcacf 100644
--- a/mirix/services/client_manager.py
+++ b/mirix/services/client_manager.py
@@ -9,7 +9,6 @@
 from mirix.schemas.client import Client as PydanticClient
 from mirix.schemas.client import ClientUpdate
 from mirix.schemas.client_api_key import ClientApiKey as PydanticClientApiKey
-from mirix.schemas.client_api_key import ClientApiKeyCreate
 from mirix.security.api_keys import hash_api_key
 from mirix.services.organization_manager import OrganizationManager
 from mirix.utils import enforce_types
@@ -28,17 +27,13 @@ def __init__(self):
         self.session_maker = db_context
 
     @enforce_types
-    async def create_default_client(
-        self, org_id: str = OrganizationManager.DEFAULT_ORG_ID
-    ) -> PydanticClient:
+    async def create_default_client(self, org_id: str = OrganizationManager.DEFAULT_ORG_ID) -> PydanticClient:
         """Create the default client (async)."""
         async with self.session_maker() as session:
             try:
                 await OrganizationModel.read(db_session=session, identifier=org_id)
             except NoResultFound:
-                raise ValueError(
-                    f"No organization with {org_id} exists in the organization table."
-                ) from None
+                raise ValueError(f"No organization with {org_id} exists in the organization table.") from None
 
             try:
                 client = await ClientModel.read(db_session=session, identifier=self.DEFAULT_CLIENT_ID)
@@ -67,9 +62,7 @@ async def create_client(self, pydantic_client: PydanticClient) -> PydanticClient
     async def update_client(self, client_update: ClientUpdate) -> PydanticClient:
         """Update client details (with cache invalidation)."""
         async with self.session_maker() as session:
-            existing_client = await ClientModel.read(
-                db_session=session, identifier=client_update.id
-            )
+            existing_client = await ClientModel.read(db_session=session, identifier=client_update.id)
             update_data = client_update.model_dump(exclude_unset=True, exclude_none=True)
             for key, value in update_data.items():
                 setattr(existing_client, key, value)
@@ -133,9 +126,7 @@ async def get_client_by_api_key(self, api_key: str) -> Optional[PydanticClient]:
             if not api_key_record:
                 return None
 
-            client = await ClientModel.read(
-                db_session=session, identifier=api_key_record.client_id
-            )
+            client = await ClientModel.read(db_session=session, identifier=api_key_record.client_id)
             if client.is_deleted or client.status != "active":
                 return None
 
@@ -157,9 +148,7 @@ async def list_client_api_keys(self, client_id: str) -> List[PydanticClientApiKe
     async def revoke_client_api_key(self, api_key_id: str) -> PydanticClientApiKey:
         """Revoke an API key (set status to 'revoked')."""
         async with self.session_maker() as session:
-            api_key = await ClientApiKeyModel.read(
-                db_session=session, identifier=api_key_id
-            )
+            api_key = await ClientApiKeyModel.read(db_session=session, identifier=api_key_id)
             api_key.status = "revoked"
             await api_key.update(session, actor=None)
             return api_key.to_pydantic()
@@ -168,9 +157,7 @@ async def revoke_client_api_key(self, api_key_id: str) -> PydanticClientApiKey:
     async def delete_client_api_key(self, api_key_id: str) -> None:
         """Permanently delete an API key from the database."""
         async with self.session_maker() as session:
-            api_key = await ClientApiKeyModel.read(
-                db_session=session, identifier=api_key_id
-            )
+            api_key = await ClientApiKeyModel.read(db_session=session, identifier=api_key_id)
             session.delete(api_key)
             await session.commit()
 
@@ -178,9 +165,7 @@ async def delete_client_api_key(self, api_key_id: str) -> None:
     async def update_client_status(self, client_id: str, status: str) -> PydanticClient:
         """Update the status of a client (with cache invalidation)."""
         async with self.session_maker() as session:
-            existing_client = await ClientModel.read(
-                db_session=session, identifier=client_id
-            )
+            existing_client = await ClientModel.read(db_session=session, identifier=client_id)
             existing_client.status = status
             await existing_client.update_with_redis(session, actor=None)
             return existing_client.to_pydantic()
@@ -357,9 +342,7 @@ async def delete_client_by_id(self, client_id: str):
                     await redis_client.client.hset(client_key, "is_deleted", "true")
                     logger.debug("Updated client %s in cache (is_deleted=true)", client_id)
                 except Exception as e:
-                    logger.warning(
-                        "Failed to update client in Redis, removing instead: %s", e
-                    )
+                    logger.warning("Failed to update client in Redis, removing instead: %s", e)
                     await redis_client.delete(client_key)
 
                 for agent_id in agent_ids:
@@ -368,9 +351,7 @@ async def delete_client_by_id(self, client_id: str):
                         await redis_client.client.hset(agent_key, "is_deleted", "true")
                     except Exception:
                         await redis_client.delete(agent_key)
-                logger.debug(
-                    "Updated %d agents in Redis cache (is_deleted=true)", len(agent_ids)
-                )
+                logger.debug("Updated %d agents in Redis cache (is_deleted=true)", len(agent_ids))
 
                 logger.info(
                     "Client %s and all associated records soft deleted: "
@@ -385,9 +366,7 @@ async def delete_client_by_id(self, client_id: str):
                     message_count,
                 )
         except Exception as e:
-            logger.warning(
-                "Failed to update Redis cache for client %s: %s", client_id, e
-            )
+            logger.warning("Failed to update Redis cache for client %s: %s", client_id, e)
 
     async def delete_memories_by_client_id(self, client_id: str):
         """
@@ -471,9 +450,7 @@ async def delete_memories_by_client_id(self, client_id: str):
             async with self.session_maker() as session:
                 from mirix.orm.block import Block as BlockModel
 
-                stmt_ids = select(BlockModel.id).where(
-                    BlockModel._created_by_id == client_id
-                )
+                stmt_ids = select(BlockModel.id).where(BlockModel._created_by_id == client_id)
                 result_ids = await session.execute(stmt_ids)
                 block_ids = [row[0] for row in result_ids.all()]
 
@@ -485,9 +462,7 @@ async def delete_memories_by_client_id(self, client_id: str):
                     for block_id in block_ids:
                         await block_manager._invalidate_block_cache(block_id)
 
-                    stmt_del = delete(BlockModel).where(
-                        BlockModel._created_by_id == client_id
-                    )
+                    stmt_del = delete(BlockModel).where(BlockModel._created_by_id == client_id)
                     await session.execute(stmt_del)
                     await session.commit()
 
@@ -496,10 +471,7 @@ async def delete_memories_by_client_id(self, client_id: str):
 
                 redis_client = get_redis_client()
                 if redis_client:
-                    redis_keys = [
-                        f"{redis_client.BLOCK_PREFIX}{block_id}"
-                        for block_id in block_ids
-                    ]
+                    redis_keys = [f"{redis_client.BLOCK_PREFIX}{block_id}" for block_id in block_ids]
                     BATCH_SIZE = 1000
                     for i in range(0, len(redis_keys), BATCH_SIZE):
                         batch = redis_keys[i : i + BATCH_SIZE]
@@ -512,9 +484,7 @@ async def delete_memories_by_client_id(self, client_id: str):
             async with self.session_maker() as session:
                 from mirix.orm.agent import Agent as AgentModel
 
-                stmt_agents = select(AgentModel).where(
-                    AgentModel._created_by_id == client_id
-                )
+                stmt_agents = select(AgentModel).where(AgentModel._created_by_id == client_id)
                 result_agents = await session.execute(stmt_agents)
                 agents = result_agents.scalars().all()
                 agent_ids = [agent.id for agent in agents]
@@ -524,8 +494,7 @@ async def delete_memories_by_client_id(self, client_id: str):
 
                 await session.commit()
                 logger.debug(
-                    "Cleared conversation message_ids from %d agents in PostgreSQL "
-                    "(kept system messages)",
+                    "Cleared conversation message_ids from %d agents in PostgreSQL " "(kept system messages)",
                     len(agent_ids),
                 )
 
@@ -591,14 +560,10 @@ async def get_client_by_id(self, client_id: str) -> PydanticClient:
 
                     cache_key = f"{cache_provider.CLIENT_PREFIX}{client_id}"
                     data = pydantic_client.model_dump(mode="json")
-                    await cache_provider.set_hash(
-                        cache_key, data, ttl=settings.redis_ttl_clients
-                    )
+                    await cache_provider.set_hash(cache_key, data, ttl=settings.redis_ttl_clients)
                     logger.debug("Populated cache for client %s", client_id)
             except Exception as e:
-                logger.warning(
-                    "Failed to populate cache for client %s: %s", client_id, e
-                )
+                logger.warning("Failed to populate cache for client %s: %s", client_id, e)
 
             return pydantic_client
 
@@ -612,9 +577,7 @@ async def get_default_client(self) -> PydanticClient:
 
             org_mgr = OrganizationManager()
             await org_mgr.get_default_organization()
-            return await self.create_default_client(
-                org_id=OrganizationManager.DEFAULT_ORG_ID
-            )
+            return await self.create_default_client(org_id=OrganizationManager.DEFAULT_ORG_ID)
 
     @enforce_types
     async def get_client_or_default(
@@ -659,7 +622,5 @@ async def list_clients(
     ) -> List[PydanticClient]:
         """List clients with pagination using cursor (id) and limit."""
         async with self.session_maker() as session:
-            results = await ClientModel.list(
-                db_session=session, cursor=cursor, limit=limit
-            )
+            results = await ClientModel.list(db_session=session, cursor=cursor, limit=limit)
             return [client.to_pydantic() for client in results]
diff --git a/mirix/services/cloud_file_mapping_manager.py b/mirix/services/cloud_file_mapping_manager.py
index 4b1f36550..d8af9298d 100644
--- a/mirix/services/cloud_file_mapping_manager.py
+++ b/mirix/services/cloud_file_mapping_manager.py
@@ -14,38 +14,28 @@ def __init__(self):
 
         self.session_maker = db_context
 
-    async def add_mapping(
-        self, cloud_file_id, local_file_id, timestamp, force_add=False
-    ):
+    async def add_mapping(self, cloud_file_id, local_file_id, timestamp, force_add=False):
         """Add a mapping from a cloud file to a local file."""
         async with self.session_maker() as session:
             try:
-                existing = await CloudFileMapping.read(
-                    db_session=session, cloud_file_id=cloud_file_id
-                )
+                existing = await CloudFileMapping.read(db_session=session, cloud_file_id=cloud_file_id)
             except Exception:
                 existing = None
             if existing:
                 if force_add:
                     await existing.hard_delete(session)
                 else:
-                    raise ValueError(
-                        f"Mapping already exists for cloud file {cloud_file_id}"
-                    )
+                    raise ValueError(f"Mapping already exists for cloud file {cloud_file_id}")
 
             try:
-                existing = await CloudFileMapping.read(
-                    db_session=session, local_file_id=local_file_id
-                )
+                existing = await CloudFileMapping.read(db_session=session, local_file_id=local_file_id)
             except Exception:
                 existing = None
             if existing:
                 if force_add:
                     await existing.hard_delete(session)
                 else:
-                    raise ValueError(
-                        f"Mapping already exists for local file {local_file_id}"
-                    )
+                    raise ValueError(f"Mapping already exists for local file {local_file_id}")
 
             pydantic_mapping_dict = {
                 "cloud_file_id": cloud_file_id,
@@ -56,9 +46,7 @@ async def add_mapping(
             }
             from mirix.services.organization_manager import OrganizationManager
 
-            pydantic_mapping_dict["organization_id"] = (
-                OrganizationManager.DEFAULT_ORG_ID
-            )
+            pydantic_mapping_dict["organization_id"] = OrganizationManager.DEFAULT_ORG_ID
 
             mapping = CloudFileMapping(**pydantic_mapping_dict)
             await mapping.create(session)
@@ -68,9 +56,7 @@ async def get_local_file(self, cloud_file_id):
         """Get the local file ID for a cloud file."""
         async with self.session_maker() as session:
             try:
-                mapping = await CloudFileMapping.read(
-                    db_session=session, cloud_file_id=cloud_file_id
-                )
+                mapping = await CloudFileMapping.read(db_session=session, cloud_file_id=cloud_file_id)
                 return mapping.local_file_id if mapping else None
             except Exception:
                 return None
@@ -79,76 +65,56 @@ async def get_cloud_file(self, local_file_id):
         """Get the cloud file ID for a local file."""
         async with self.session_maker() as session:
             try:
-                mapping = await CloudFileMapping.read(
-                    db_session=session, local_file_id=local_file_id
-                )
+                mapping = await CloudFileMapping.read(db_session=session, local_file_id=local_file_id)
                 return mapping.cloud_file_id if mapping else None
             except Exception:
                 return None
 
-    async def delete_mapping(
-        self, cloud_file_id=None, local_file_id=None
-    ) -> None:
+    async def delete_mapping(self, cloud_file_id=None, local_file_id=None) -> None:
         """Delete a mapping."""
         async with self.session_maker() as session:
             if cloud_file_id is not None:
                 try:
-                    mapping = await CloudFileMapping.read(
-                        db_session=session, cloud_file_id=cloud_file_id
-                    )
+                    mapping = await CloudFileMapping.read(db_session=session, cloud_file_id=cloud_file_id)
                     await mapping.hard_delete(session)
                 except Exception:
                     pass
             if local_file_id is not None:
                 try:
-                    mapping = await CloudFileMapping.read(
-                        db_session=session, local_file_id=local_file_id
-                    )
+                    mapping = await CloudFileMapping.read(db_session=session, local_file_id=local_file_id)
                     await mapping.hard_delete(session)
                 except Exception:
                     pass
 
-    async def check_if_existing(
-        self, cloud_file_id=None, local_file_id=None
-    ) -> bool:
+    async def check_if_existing(self, cloud_file_id=None, local_file_id=None) -> bool:
         """Check if the file_ids exist in the database."""
         async with self.session_maker() as session:
             if cloud_file_id is not None:
                 try:
-                    await CloudFileMapping.read(
-                        db_session=session, cloud_file_id=cloud_file_id
-                    )
+                    await CloudFileMapping.read(db_session=session, cloud_file_id=cloud_file_id)
                     return True
                 except Exception:
                     pass
             elif local_file_id is not None:
                 try:
-                    await CloudFileMapping.read(
-                        db_session=session, local_file_id=local_file_id
-                    )
+                    await CloudFileMapping.read(db_session=session, local_file_id=local_file_id)
                     return True
                 except Exception:
                     pass
         return False
 
-    async def set_processed(
-        self, cloud_file_id=None, local_file_id=None
-    ) -> PydanticCloudFileMapping:
+    async def set_processed(self, cloud_file_id=None, local_file_id=None) -> PydanticCloudFileMapping:
         """Set status to processed."""
         async with self.session_maker() as session:
             mapping = None
             if cloud_file_id is not None:
                 try:
-                    mapping = await CloudFileMapping.read(
-                        db_session=session, cloud_file_id=cloud_file_id
-                    )
+                    mapping = await CloudFileMapping.read(db_session=session, cloud_file_id=cloud_file_id)
                 except Exception:
                     pass
             elif local_file_id is not None:
                 try:
-                    mapping = await CloudFileMapping.read(
-                        db_session=session, local_file_id=local_file_id
-                    )
+                    mapping = await CloudFileMapping.read(db_session=session, local_file_id=local_file_id)
                 except Exception:
                     pass
             if mapping is None:
diff --git a/mirix/services/episodic_memory_manager.py b/mirix/services/episodic_memory_manager.py
index 68ef38cb2..ca5da4be9 100755
--- a/mirix/services/episodic_memory_manager.py
+++ b/mirix/services/episodic_memory_manager.py
@@ -24,6 +24,11 @@
 
 logger = get_logger(__name__)
 
+# Diagnostic flag for MissingGreenlet debugging
+import os
+
+_TRACE_MISSING_GREENLET = os.getenv("MIRIX_TRACE_MISSING_GREENLET", "false").lower() == "true"
+
 
 class EpisodicMemoryManager:
     """Manager class to handle business logic related to Episodic episodic_memory items."""
@@ -260,9 +265,7 @@ async def create_episodic_memory(
         if not episodic_memory.id:
             from mirix.utils import generate_unique_short_id_async
 
-            episodic_memory.id = await generate_unique_short_id_async(
-                self.session_maker, EpisodicEvent, "ep"
-            )
+            episodic_memory.id = await generate_unique_short_id_async(self.session_maker, EpisodicEvent, "ep")
 
         # Convert the Pydantic model into a dict
         episodic_memory_dict = episodic_memory.model_dump()
@@ -349,9 +352,7 @@ async def delete_by_client_id(self, actor: PydanticClient) -> int:
 
         async with self.session_maker() as session:
             # Get IDs for Redis cleanup (only fetch IDs, not full objects)
-            result = await session.execute(
-                select(EpisodicEvent.id).where(EpisodicEvent.client_id == actor.id)
-            )
+            result = await session.execute(select(EpisodicEvent.id).where(EpisodicEvent.client_id == actor.id))
             item_ids = [row[0] for row in result.all()]
 
             count = len(item_ids)
@@ -359,9 +360,7 @@ async def delete_by_client_id(self, actor: PydanticClient) -> int:
                 return 0
 
             # Bulk delete in single query
-            await session.execute(
-                delete(EpisodicEvent).where(EpisodicEvent.client_id == actor.id)
-            )
+            await session.execute(delete(EpisodicEvent).where(EpisodicEvent.client_id == actor.id))
 
             await session.commit()
 
@@ -498,18 +497,14 @@ async def delete_by_user_id(self, user_id: str) -> int:
         from mirix.database.redis_client import get_redis_client
 
         async with self.session_maker() as session:
-            result = await session.execute(
-                select(EpisodicEvent.id).where(EpisodicEvent.user_id == user_id)
-            )
+            result = await session.execute(select(EpisodicEvent.id).where(EpisodicEvent.user_id == user_id))
             item_ids = [row[0] for row in result.all()]
 
             count = len(item_ids)
             if count == 0:
                 return 0
 
-            await session.execute(
-                delete(EpisodicEvent).where(EpisodicEvent.user_id == user_id)
-            )
+            await session.execute(delete(EpisodicEvent).where(EpisodicEvent.user_id == user_id))
 
             await session.commit()
 
@@ -741,7 +736,9 @@ async def list_episodic_memory(
                         from mirix.constants import MAX_EMBEDDING_DIM
                         from mirix.embeddings import embedding_model
 
-                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(query)
+                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(
+                            query
+                        )
                         embedded_text = np.array(embedded_text)
                         embedded_text = np.pad(
                             embedded_text,
@@ -1458,7 +1455,9 @@ async def list_episodic_memory_by_org(
                         from mirix.constants import MAX_EMBEDDING_DIM
                         from mirix.embeddings import embedding_model
 
-                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(query)
+                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(
+                            query
+                        )
                         embedded_text = np.array(embedded_text)
                         embedded_text = np.pad(
                             embedded_text,
@@ -1543,7 +1542,30 @@ async def list_episodic_memory_by_org(
                     base_query = base_query.limit(limit)
                 result = await session.execute(base_query)
                 episodic_memory = result.scalars().all()
-                return [event.to_pydantic() for event in episodic_memory]
+
+                if _TRACE_MISSING_GREENLET:
+                    logger.info(
+                        "Converting %d episodic events to Pydantic in list_episodic_memory_by_org", len(episodic_memory)
+                    )
+                    pydantic_events = []
+                    for i, event in enumerate(episodic_memory):
+                        try:
+                            pydantic_events.append(event.to_pydantic())
+                        except Exception as e:
+                            if "MissingGreenlet" in str(type(e).__name__) or "greenlet" in str(e).lower():
+                                import traceback
+
+                                logger.error(
+                                    "MissingGreenlet in list_episodic_memory_by_org at index %d, event_id=%s\n"
+                                    "Full traceback:\n%s",
+                                    i,
+                                    event.id,
+                                    traceback.format_exc(),
+                                )
+                            raise
+                    return pydantic_events
+                else:
+                    return [event.to_pydantic() for event in episodic_memory]
 
             if search_method == "embedding":
                 embed_query = True
@@ -1583,7 +1605,7 @@ async def list_episodic_memory_by_org(
                 base_query = base_query.order_by(embedding_query_field)
             elif search_method == "bm25":
                 # Use PostgreSQL native full-text search if available
-                from sqlalchemy import func, text
+                from sqlalchemy import func
 
                 # Determine search field
                 if not search_field or search_field == "details":
diff --git a/mirix/services/file_manager.py b/mirix/services/file_manager.py
index c63ce1b29..8157fa2a8 100644
--- a/mirix/services/file_manager.py
+++ b/mirix/services/file_manager.py
@@ -19,9 +19,7 @@ def __init__(self):
         self.session_maker = db_context
 
     @enforce_types
-    async def create_file_metadata(
-        self, pydantic_file: PydanticFileMetadata
-    ) -> PydanticFileMetadata:
+    async def create_file_metadata(self, pydantic_file: PydanticFileMetadata) -> PydanticFileMetadata:
         """Create new file metadata."""
         async with self.session_maker() as session:
             file_metadata = FileMetadataModel(**pydantic_file.model_dump())
@@ -32,9 +30,7 @@ async def create_file_metadata(
     async def get_file_metadata_by_id(self, file_id: str) -> PydanticFileMetadata:
         """Get file metadata by ID."""
         async with self.session_maker() as session:
-            file_metadata = await FileMetadataModel.read(
-                db_session=session, identifier=file_id
-            )
+            file_metadata = await FileMetadataModel.read(db_session=session, identifier=file_id)
             return file_metadata.to_pydantic()
 
     @enforce_types
@@ -55,14 +51,10 @@ async def get_files_by_organization_id(
             return [f.to_pydantic() for f in results]
 
     @enforce_types
-    async def update_file_metadata(
-        self, file_id: str, **kwargs
-    ) -> PydanticFileMetadata:
+    async def update_file_metadata(self, file_id: str, **kwargs) -> PydanticFileMetadata:
         """Update file metadata."""
         async with self.session_maker() as session:
-            file_metadata = await FileMetadataModel.read(
-                db_session=session, identifier=file_id
-            )
+            file_metadata = await FileMetadataModel.read(db_session=session, identifier=file_id)
             for key, value in kwargs.items():
                 if hasattr(file_metadata, key) and value is not None:
                     setattr(file_metadata, key, value)
@@ -74,20 +66,14 @@ async def update_file_metadata(
     async def delete_file_metadata(self, file_id: str) -> None:
         """Delete file metadata by ID."""
         async with self.session_maker() as session:
-            file_metadata = await FileMetadataModel.read(
-                db_session=session, identifier=file_id
-            )
+            file_metadata = await FileMetadataModel.read(db_session=session, identifier=file_id)
             await file_metadata.hard_delete(session)
 
     @enforce_types
-    async def list_files(
-        self, cursor: Optional[str] = None, limit: Optional[int] = 50
-    ) -> List[PydanticFileMetadata]:
+    async def list_files(self, cursor: Optional[str] = None, limit: Optional[int] = 50) -> List[PydanticFileMetadata]:
         """List all files with pagination."""
         async with self.session_maker() as session:
-            results = await FileMetadataModel.list(
-                db_session=session, cursor=cursor, limit=limit
-            )
+            results = await FileMetadataModel.list(db_session=session, cursor=cursor, limit=limit)
             return [f.to_pydantic() for f in results]
 
     @enforce_types
@@ -140,14 +126,10 @@ async def search_files_by_name(
         """Search files by name pattern."""
         async with self.session_maker() as session:
             stmt = select(FileMetadataModel).where(
-                func.lower(FileMetadataModel.file_name).contains(
-                    func.lower(file_name)
-                )
+                func.lower(FileMetadataModel.file_name).contains(func.lower(file_name))
             )
             if organization_id:
-                stmt = stmt.where(
-                    FileMetadataModel.organization_id == organization_id
-                )
+                stmt = stmt.where(FileMetadataModel.organization_id == organization_id)
             result = await session.execute(stmt)
             rows = result.scalars().all()
             return [f.to_pydantic() for f in rows]
@@ -158,53 +140,37 @@ async def get_files_by_type(
     ) -> List[PydanticFileMetadata]:
         """Get files by file type."""
         async with self.session_maker() as session:
-            stmt = select(FileMetadataModel).where(
-                FileMetadataModel.file_type == file_type
-            )
+            stmt = select(FileMetadataModel).where(FileMetadataModel.file_type == file_type)
             if organization_id:
-                stmt = stmt.where(
-                    FileMetadataModel.organization_id == organization_id
-                )
+                stmt = stmt.where(FileMetadataModel.organization_id == organization_id)
             result = await session.execute(stmt)
             rows = result.scalars().all()
             return [f.to_pydantic() for f in rows]
 
     @enforce_types
-    async def check_file_exists(
-        self, file_path: str, organization_id: Optional[str] = None
-    ) -> bool:
+    async def check_file_exists(self, file_path: str, organization_id: Optional[str] = None) -> bool:
         """Check if a file with the given path exists in the database."""
         async with self.session_maker() as session:
             try:
-                stmt = select(FileMetadataModel).where(
-                    FileMetadataModel.file_path == file_path
-                )
+                stmt = select(FileMetadataModel).where(FileMetadataModel.file_path == file_path)
                 if organization_id:
-                    stmt = stmt.where(
-                        FileMetadataModel.organization_id == organization_id
-                    )
+                    stmt = stmt.where(FileMetadataModel.organization_id == organization_id)
                 result = await session.execute(stmt)
                 return result.scalar_one_or_none() is not None
             except Exception:
                 return False
 
     @enforce_types
-    async def get_file_stats(
-        self, organization_id: Optional[str] = None
-    ) -> dict:
+    async def get_file_stats(self, organization_id: Optional[str] = None) -> dict:
         """Get file statistics for an organization or globally."""
         async with self.session_maker() as session:
             stmt = select(
                 func.count(FileMetadataModel.id).label("total_files"),
                 func.sum(FileMetadataModel.file_size).label("total_size"),
-                func.count(func.distinct(FileMetadataModel.file_type)).label(
-                    "unique_types"
-                ),
+                func.count(func.distinct(FileMetadataModel.file_type)).label("unique_types"),
             )
             if organization_id:
-                stmt = stmt.where(
-                    FileMetadataModel.organization_id == organization_id
-                )
+                stmt = stmt.where(FileMetadataModel.organization_id == organization_id)
             result = await session.execute(stmt)
             row = result.one()
             return {
diff --git a/mirix/services/helpers/agent_manager_helper.py b/mirix/services/helpers/agent_manager_helper.py
index 8bb05d2f7..815423f7f 100755
--- a/mirix/services/helpers/agent_manager_helper.py
+++ b/mirix/services/helpers/agent_manager_helper.py
@@ -15,7 +15,6 @@
 from mirix.schemas.memory import Memory
 from mirix.schemas.message import Message, MessageCreate
 from mirix.schemas.tool_rule import ToolRule
-from mirix.schemas.user import User
 from mirix.utils import get_local_time
 
 
@@ -49,9 +48,7 @@ async def _process_relationship(
             setattr(agent, relationship_name, [])
         return
 
-    result = await session.execute(
-        select(model_class).where(model_class.id.in_(item_ids))
-    )
+    result = await session.execute(select(model_class).where(model_class.id.in_(item_ids)))
     found_items = result.scalars().all()
 
     if not allow_partial and len(found_items) != len(item_ids):
diff --git a/mirix/services/knowledge_vault_manager.py b/mirix/services/knowledge_vault_manager.py
index 7fac83181..12f29634b 100755
--- a/mirix/services/knowledge_vault_manager.py
+++ b/mirix/services/knowledge_vault_manager.py
@@ -440,7 +440,9 @@ async def get_item_by_id(
                     id="system-default-client", organization_id=user.organization_id, name="system-client"
                 )
 
-                item = await KnowledgeVaultItem.read(db_session=session, identifier=knowledge_vault_item_id, actor=actor)
+                item = await KnowledgeVaultItem.read(
+                    db_session=session, identifier=knowledge_vault_item_id, actor=actor
+                )
                 pydantic_item = item.to_pydantic()
 
                 try:
@@ -506,9 +508,7 @@ async def create_item(
         if not knowledge_vault_item.id:
             from mirix.utils import generate_unique_short_id_async
 
-            knowledge_vault_item.id = await generate_unique_short_id_async(
-                self.session_maker, KnowledgeVaultItem, "kv"
-            )
+            knowledge_vault_item.id = await generate_unique_short_id_async(self.session_maker, KnowledgeVaultItem, "kv")
 
         item_data = knowledge_vault_item.model_dump()
 
@@ -950,7 +950,9 @@ async def delete_knowledge_by_id(self, knowledge_vault_item_id: str, actor: Pyda
         """Delete a knowledge vault item by ID (removes from cache)."""
         async with self.session_maker() as session:
             try:
-                item = await KnowledgeVaultItem.read(db_session=session, identifier=knowledge_vault_item_id, actor=actor)
+                item = await KnowledgeVaultItem.read(
+                    db_session=session, identifier=knowledge_vault_item_id, actor=actor
+                )
                 # Remove from cache
                 from mirix.database.cache_provider import get_cache_provider
 
@@ -1114,9 +1116,7 @@ async def delete_by_user_id(self, user_id: str) -> int:
 
         async with self.session_maker() as session:
             # Get IDs for Redis cleanup (only fetch IDs, not full objects)
-            result = await session.execute(
-                select(KnowledgeVaultItem.id).where(KnowledgeVaultItem.user_id == user_id)
-            )
+            result = await session.execute(select(KnowledgeVaultItem.id).where(KnowledgeVaultItem.user_id == user_id))
             item_ids = [row[0] for row in result.all()]
 
             count = len(item_ids)
@@ -1184,7 +1184,9 @@ async def list_knowledge_by_org(
                         from mirix.constants import MAX_EMBEDDING_DIM
                         from mirix.embeddings import embedding_model
 
-                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(query)
+                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(
+                            query
+                        )
                         embedded_text = np.array(embedded_text)
                         embedded_text = np.pad(
                             embedded_text,
diff --git a/mirix/services/mcp_tool_registry.py b/mirix/services/mcp_tool_registry.py
index 33dc94354..3850047ca 100644
--- a/mirix/services/mcp_tool_registry.py
+++ b/mirix/services/mcp_tool_registry.py
@@ -202,9 +202,7 @@ def _json_type_to_python_type(self, json_type: str) -> str:
         }
         return type_map.get(json_type, "str")
 
-    async def unregister_mcp_tools(
-        self, actor: PydanticClient, server_name: Optional[str] = None
-    ) -> int:
+    async def unregister_mcp_tools(self, actor: PydanticClient, server_name: Optional[str] = None) -> int:
         """
         Unregister MCP tools from database.
 
@@ -245,9 +243,7 @@ async def sync_mcp_tools(self, actor: PydanticClient) -> Dict[str, int]:
             current_tool_names.update(t["full_name"] for t in tools)
 
         existing_tools = await self.tool_manager.list_tools(actor)
-        existing_mcp_tools = [
-            t for t in existing_tools if t.tool_type == ToolType.MIRIX_MCP
-        ]
+        existing_mcp_tools = [t for t in existing_tools if t.tool_type == ToolType.MIRIX_MCP]
         existing_tool_names = {t.name for t in existing_mcp_tools}
 
         new_tools = current_tool_names - existing_tool_names
@@ -255,9 +251,7 @@ async def sync_mcp_tools(self, actor: PydanticClient) -> Dict[str, int]:
         if new_tools:
             filtered_discovered = {}
             for sname, tools in discovered_tools.items():
-                filtered_tools = [
-                    t for t in tools if t["full_name"] in new_tools
-                ]
+                filtered_tools = [t for t in tools if t["full_name"] in new_tools]
                 if filtered_tools:
                     filtered_discovered[sname] = filtered_tools
             old_cache = self._mcp_tool_cache
diff --git a/mirix/services/message_manager.py b/mirix/services/message_manager.py
index 1ccffdb3d..00a091f1d 100755
--- a/mirix/services/message_manager.py
+++ b/mirix/services/message_manager.py
@@ -135,10 +135,7 @@ async def create_many_messages(
         user_id: Optional[str] = None,
     ) -> List[PydanticMessage]:
         """Create multiple messages."""
-        return [
-            await self.create_message(m, actor=actor, client_id=client_id, user_id=user_id)
-            for m in pydantic_msgs
-        ]
+        return [await self.create_message(m, actor=actor, client_id=client_id, user_id=user_id) for m in pydantic_msgs]
 
     @enforce_types
     async def update_message_by_id(
@@ -358,9 +355,10 @@ async def delete_by_user_id(self, user_id: str) -> int:
         Returns:
             Number of records deleted
         """
+        from sqlalchemy import delete
+
         from mirix.database.redis_client import get_redis_client
         from mirix.schemas.message import MessageRole
-        from sqlalchemy import delete
 
         async with self.session_maker() as session:
             # Get IDs for non-system messages only (preserve system messages)
@@ -585,9 +583,7 @@ async def cleanup_all_detached_messages(self, actor: PydanticClient) -> Dict[str
 
         async with self.session_maker() as session:
             # Get all agents for this organization
-            agents = await AgentModel.list(
-                db_session=session, organization_id=actor.organization_id, limit=None
-            )
+            agents = await AgentModel.list(db_session=session, organization_id=actor.organization_id, limit=None)
 
             cleanup_results = {}
             total_deleted = 0
diff --git a/mirix/services/organization_manager.py b/mirix/services/organization_manager.py
index 24dddc318..3ecb69527 100755
--- a/mirix/services/organization_manager.py
+++ b/mirix/services/organization_manager.py
@@ -86,14 +86,10 @@ async def _create_organization(self, pydantic_org: PydanticOrganization) -> Pyda
     @enforce_types
     async def create_default_organization(self) -> PydanticOrganization:
         """Create the default organization."""
-        return await self.create_organization(
-            PydanticOrganization(name=self.DEFAULT_ORG_NAME, id=self.DEFAULT_ORG_ID)
-        )
+        return await self.create_organization(PydanticOrganization(name=self.DEFAULT_ORG_NAME, id=self.DEFAULT_ORG_ID))
 
     @enforce_types
-    async def update_organization_name_using_id(
-        self, org_id: str, name: Optional[str] = None
-    ) -> PydanticOrganization:
+    async def update_organization_name_using_id(self, org_id: str, name: Optional[str] = None) -> PydanticOrganization:
         """Update an organization (with cache invalidation)."""
         async with self.session_maker() as session:
             org = await OrganizationModel.read(db_session=session, identifier=org_id)
diff --git a/mirix/services/procedural_memory_manager.py b/mirix/services/procedural_memory_manager.py
index cda827658..d2c589706 100755
--- a/mirix/services/procedural_memory_manager.py
+++ b/mirix/services/procedural_memory_manager.py
@@ -495,9 +495,7 @@ async def create_item(
         if not item_data.id:
             from mirix.utils import generate_unique_short_id_async
 
-            item_data.id = await generate_unique_short_id_async(
-                self.session_maker, ProceduralMemoryItem, "proc"
-            )
+            item_data.id = await generate_unique_short_id_async(self.session_maker, ProceduralMemoryItem, "proc")
 
         data_dict = item_data.model_dump()
 
@@ -708,9 +706,7 @@ async def list_procedures(
 
                 from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-                query_stmt = apply_filter_tags_sqlalchemy(
-                    query_stmt, ProceduralMemoryItem, filter_tags, scopes=scopes
-                )
+                query_stmt = apply_filter_tags_sqlalchemy(query_stmt, ProceduralMemoryItem, filter_tags, scopes=scopes)
 
                 if limit:
                     query_stmt = query_stmt.limit(limit)
@@ -740,9 +736,7 @@ async def list_procedures(
 
                 from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-                base_query = apply_filter_tags_sqlalchemy(
-                    base_query, ProceduralMemoryItem, filter_tags, scopes=scopes
-                )
+                base_query = apply_filter_tags_sqlalchemy(base_query, ProceduralMemoryItem, filter_tags, scopes=scopes)
 
                 if search_method == "embedding":
                     main_query = await build_query(
@@ -1199,7 +1193,9 @@ async def list_procedures_by_org(
                         from mirix.constants import MAX_EMBEDDING_DIM
                         from mirix.embeddings import embedding_model
 
-                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(query)
+                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(
+                            query
+                        )
                         embedded_text = np.array(embedded_text)
                         embedded_text = np.pad(
                             embedded_text,
@@ -1245,9 +1241,7 @@ async def list_procedures_by_org(
 
             from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-            base_query = apply_filter_tags_sqlalchemy(
-                base_query, ProceduralMemoryItem, filter_tags, scopes=scopes
-            )
+            base_query = apply_filter_tags_sqlalchemy(base_query, ProceduralMemoryItem, filter_tags, scopes=scopes)
 
             # Handle empty query - fall back to recent sort
             if not query or query == "":
@@ -1306,4 +1300,4 @@ async def list_procedures_by_org(
 
             result = await session.execute(base_query)
             items = result.scalars().all()
-            return [item.to_pydantic() for item in items]
\ No newline at end of file
+            return [item.to_pydantic() for item in items]
diff --git a/mirix/services/provider_manager.py b/mirix/services/provider_manager.py
index 8c0b3b6bf..d155a7f34 100644
--- a/mirix/services/provider_manager.py
+++ b/mirix/services/provider_manager.py
@@ -48,9 +48,7 @@ async def upsert_provider(
         )
 
     @enforce_types
-    async def create_provider(
-        self, provider: PydanticProvider, actor: PydanticClient
-    ) -> PydanticProvider:
+    async def create_provider(self, provider: PydanticProvider, actor: PydanticClient) -> PydanticProvider:
         """Create a new provider if it doesn't already exist."""
         async with self.session_maker() as session:
             provider.organization_id = actor.organization_id
@@ -65,9 +63,7 @@ async def update_provider(
     ) -> PydanticProvider:
         """Update provider details."""
         async with self.session_maker() as session:
-            existing_provider = await ProviderModel.read(
-                db_session=session, identifier=provider_id, actor=actor
-            )
+            existing_provider = await ProviderModel.read(db_session=session, identifier=provider_id, actor=actor)
             update_data = provider_update.model_dump(exclude_unset=True, exclude_none=True)
             for key, value in update_data.items():
                 setattr(existing_provider, key, value)
@@ -78,9 +74,7 @@ async def update_provider(
     async def delete_provider_by_id(self, provider_id: str, actor: PydanticClient) -> None:
         """Delete a provider."""
         async with self.session_maker() as session:
-            existing_provider = await ProviderModel.read(
-                db_session=session, identifier=provider_id, actor=actor
-            )
+            existing_provider = await ProviderModel.read(db_session=session, identifier=provider_id, actor=actor)
             existing_provider.api_key = None
             await existing_provider.update(session, actor=actor)
             await existing_provider.delete(session, actor=actor)
diff --git a/mirix/services/raw_memory_manager.py b/mirix/services/raw_memory_manager.py
index b4ca6d596..9a1172bd8 100644
--- a/mirix/services/raw_memory_manager.py
+++ b/mirix/services/raw_memory_manager.py
@@ -11,7 +11,7 @@
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional, Tuple
 
-from sqlalchemy import and_, desc, func, or_, select
+from sqlalchemy import and_, desc, or_, select
 
 from mirix.constants import BUILD_EMBEDDINGS_FOR_MEMORY
 from mirix.log import get_logger
@@ -116,9 +116,7 @@ async def create_raw_memory(
 
         # Ensure ID is set before model_dump
         if not raw_memory.id:
-            raw_memory.id = await generate_unique_short_id_async(
-                self.session_maker, RawMemory, "raw_mem"
-            )
+            raw_memory.id = await generate_unique_short_id_async(self.session_maker, RawMemory, "raw_mem")
 
         # Auto-inject scope from actor's write_scope
         if actor.write_scope is None:
@@ -185,9 +183,7 @@ async def create_raw_memory(
         # Create the raw memory item (with conditional Redis caching)
         async with self.session_maker() as session:
             raw_memory_item = RawMemory(**raw_memory_dict)
-            await raw_memory_item.create_with_redis(
-                session, actor=actor, use_cache=use_cache
-            )
+            await raw_memory_item.create_with_redis(session, actor=actor, use_cache=use_cache)
 
             logger.info("Raw memory created: id=%s", raw_memory_item.id)
             return raw_memory_item.to_pydantic()
@@ -251,9 +247,7 @@ async def get_raw_memory_by_id(
         # Cache MISS or cache unavailable - fetch from PostgreSQL
         async with self.session_maker() as session:
             try:
-                raw_memory_item = await RawMemory.read(
-                    db_session=session, identifier=memory_id, actor=actor
-                )
+                raw_memory_item = await RawMemory.read(db_session=session, identifier=memory_id, actor=actor)
                 pydantic_memory = raw_memory_item.to_pydantic()
 
                 # Validate scope - memory must be in actor's read_scopes
@@ -270,9 +264,7 @@ async def get_raw_memory_by_id(
                     if cache_provider:
                         cache_key = f"{cache_provider.RAW_MEMORY_PREFIX}{memory_id}"
                         data = pydantic_memory.model_dump(mode="json")
-                        await cache_provider.set_json(
-                            cache_key, data, ttl=settings.redis_ttl_default
-                        )
+                        await cache_provider.set_json(cache_key, data, ttl=settings.redis_ttl_default)
                         logger.debug(
                             "Populated cache for raw memory %s",
                             memory_id,
@@ -459,9 +451,7 @@ async def delete_raw_memory(
 
         async with self.session_maker() as session:
             try:
-                raw_memory = await RawMemory.read(
-                    db_session=session, identifier=memory_id, actor=actor
-                )
+                raw_memory = await RawMemory.read(db_session=session, identifier=memory_id, actor=actor)
 
                 # Perform scope access control check - must match actor's write_scope to delete
                 memory_scope = (raw_memory.filter_tags or {}).get("scope")
diff --git a/mirix/services/resource_memory_manager.py b/mirix/services/resource_memory_manager.py
index 95d5aa9d4..130cca170 100755
--- a/mirix/services/resource_memory_manager.py
+++ b/mirix/services/resource_memory_manager.py
@@ -447,9 +447,7 @@ async def create_item(
         if not item_data.id:
             from mirix.utils import generate_unique_short_id_async
 
-            item_data.id = await generate_unique_short_id_async(
-                self.session_maker, ResourceMemoryItem, "res"
-            )
+            item_data.id = await generate_unique_short_id_async(self.session_maker, ResourceMemoryItem, "res")
 
         data_dict = item_data.model_dump()
 
@@ -664,9 +662,7 @@ async def list_resources(
 
                 from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-                query_stmt = apply_filter_tags_sqlalchemy(
-                    query_stmt, ResourceMemoryItem, filter_tags, scopes=scopes
-                )
+                query_stmt = apply_filter_tags_sqlalchemy(query_stmt, ResourceMemoryItem, filter_tags, scopes=scopes)
 
                 if limit:
                     query_stmt = query_stmt.limit(limit)
@@ -695,9 +691,7 @@ async def list_resources(
 
             from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-            base_query = apply_filter_tags_sqlalchemy(
-                base_query, ResourceMemoryItem, filter_tags, scopes=scopes
-            )
+            base_query = apply_filter_tags_sqlalchemy(base_query, ResourceMemoryItem, filter_tags, scopes=scopes)
 
             if search_method == "string_match":
                 main_query = base_query.where(
@@ -736,7 +730,9 @@ async def list_resources(
                 else:
                     # Fallback to in-memory BM25 for SQLite (legacy method)
                     # Load all candidate items (memory-intensive, kept for compatibility)
-                    result = await session.execute(select(ResourceMemoryItem).where(ResourceMemoryItem.user_id == user.id))
+                    result = await session.execute(
+                        select(ResourceMemoryItem).where(ResourceMemoryItem.user_id == user.id)
+                    )
                     all_items = result.scalars().all()
 
                     if not all_items:
@@ -938,8 +934,9 @@ async def soft_delete_by_client_id(self, actor: PydanticClient) -> int:
         async with self.session_maker() as session:
             # Query all non-deleted records for this client (use actor.id)
             result = await session.execute(
-                select(ResourceMemoryItem)
-                .where(ResourceMemoryItem.client_id == actor.id, ResourceMemoryItem.is_deleted == False)
+                select(ResourceMemoryItem).where(
+                    ResourceMemoryItem.client_id == actor.id, ResourceMemoryItem.is_deleted == False
+                )
             )
             items = result.scalars().all()
 
@@ -984,8 +981,9 @@ async def soft_delete_by_user_id(self, user_id: str) -> int:
         async with self.session_maker() as session:
             # Query all non-deleted records for this user
             result = await session.execute(
-                select(ResourceMemoryItem)
-                .where(ResourceMemoryItem.user_id == user_id, ResourceMemoryItem.is_deleted == False)
+                select(ResourceMemoryItem).where(
+                    ResourceMemoryItem.user_id == user_id, ResourceMemoryItem.is_deleted == False
+                )
             )
             items = result.scalars().all()
 
@@ -1030,9 +1028,7 @@ async def delete_by_user_id(self, user_id: str) -> int:
 
         async with self.session_maker() as session:
             # Get IDs for Redis cleanup (only fetch IDs, not full objects)
-            result = await session.execute(
-                select(ResourceMemoryItem.id).where(ResourceMemoryItem.user_id == user_id)
-            )
+            result = await session.execute(select(ResourceMemoryItem.id).where(ResourceMemoryItem.user_id == user_id))
             item_ids = [row[0] for row in result.all()]
 
             count = len(item_ids)
@@ -1123,7 +1119,9 @@ async def list_resources_by_org(
                         from mirix.constants import MAX_EMBEDDING_DIM
                         from mirix.embeddings import embedding_model
 
-                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(query)
+                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(
+                            query
+                        )
                         embedded_text = np.array(embedded_text)
                         embedded_text = np.pad(
                             embedded_text,
@@ -1176,9 +1174,7 @@ async def list_resources_by_org(
 
             from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-            base_query = apply_filter_tags_sqlalchemy(
-                base_query, ResourceMemoryItem, filter_tags, scopes=scopes
-            )
+            base_query = apply_filter_tags_sqlalchemy(base_query, ResourceMemoryItem, filter_tags, scopes=scopes)
 
             # Handle empty query - fall back to recent sort
             if not query or query == "":
@@ -1227,4 +1223,4 @@ async def list_resources_by_org(
 
             result = await session.execute(base_query)
             resource_memory = result.scalars().all()
-            return [item.to_pydantic() for item in resource_memory]
\ No newline at end of file
+            return [item.to_pydantic() for item in resource_memory]
diff --git a/mirix/services/semantic_memory_manager.py b/mirix/services/semantic_memory_manager.py
index c43c98b17..a7f4969f0 100755
--- a/mirix/services/semantic_memory_manager.py
+++ b/mirix/services/semantic_memory_manager.py
@@ -518,9 +518,7 @@ async def create_item(
 
         # Ensure ID is set before model_dump
         if not item_data.id:
-            item_data.id = await generate_unique_short_id_async(
-                self.session_maker, SemanticMemoryItem, "sem"
-            )
+            item_data.id = await generate_unique_short_id_async(self.session_maker, SemanticMemoryItem, "sem")
 
         data_dict = item_data.model_dump()
 
@@ -771,9 +769,7 @@ async def list_semantic_items(
 
                 from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-                query_stmt = apply_filter_tags_sqlalchemy(
-                    query_stmt, SemanticMemoryItem, filter_tags, scopes=scopes
-                )
+                query_stmt = apply_filter_tags_sqlalchemy(query_stmt, SemanticMemoryItem, filter_tags, scopes=scopes)
 
                 if limit:
                     query_stmt = query_stmt.limit(limit)
@@ -810,9 +806,7 @@ async def list_semantic_items(
 
                 from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-                base_query = apply_filter_tags_sqlalchemy(
-                    base_query, SemanticMemoryItem, filter_tags, scopes=scopes
-                )
+                base_query = apply_filter_tags_sqlalchemy(base_query, SemanticMemoryItem, filter_tags, scopes=scopes)
 
                 if search_method == "embedding":
                     embed_query = True
@@ -908,7 +902,9 @@ async def list_semantic_items(
 
                 elif search_method == "fuzzy_match":
                     # Fuzzy matching: load all candidate items into memory and compute a fuzzy match score.
-                    result = await session.execute(select(SemanticMemoryItem).where(SemanticMemoryItem.user_id == user.id))
+                    result = await session.execute(
+                        select(SemanticMemoryItem).where(SemanticMemoryItem.user_id == user.id)
+                    )
                     all_items = result.scalars().all()
                     scored_items = []
                     for item in all_items:
@@ -1181,9 +1177,7 @@ async def delete_by_user_id(self, user_id: str) -> int:
 
         async with self.session_maker() as session:
             # Get IDs for Redis cleanup (only fetch IDs, not full objects)
-            result = await session.execute(
-                select(SemanticMemoryItem.id).where(SemanticMemoryItem.user_id == user_id)
-            )
+            result = await session.execute(select(SemanticMemoryItem.id).where(SemanticMemoryItem.user_id == user_id))
             item_ids = [row[0] for row in result.all()]
 
             count = len(item_ids)
@@ -1251,7 +1245,9 @@ async def list_semantic_items_by_org(
                         from mirix.constants import MAX_EMBEDDING_DIM
                         from mirix.embeddings import embedding_model
 
-                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(query)
+                        embedded_text = await (await embedding_model(agent_state.embedding_config)).get_text_embedding(
+                            query
+                        )
                         embedded_text = np.array(embedded_text)
                         embedded_text = np.pad(
                             embedded_text,
@@ -1299,9 +1295,7 @@ async def list_semantic_items_by_org(
 
             from mirix.database.filter_tags_query import apply_filter_tags_sqlalchemy
 
-            base_query = apply_filter_tags_sqlalchemy(
-                base_query, SemanticMemoryItem, filter_tags, scopes=scopes
-            )
+            base_query = apply_filter_tags_sqlalchemy(base_query, SemanticMemoryItem, filter_tags, scopes=scopes)
 
             # Handle empty query - fall back to recent sort
             if not query or query == "":
@@ -1378,4 +1372,4 @@ async def list_semantic_items_by_org(
 
             result = await session.execute(base_query)
             items = result.scalars().all()
-            return [item.to_pydantic() for item in items]
\ No newline at end of file
+            return [item.to_pydantic() for item in items]
diff --git a/mirix/services/tool_execution_sandbox.py b/mirix/services/tool_execution_sandbox.py
index 88900d47b..d08cffd9e 100755
--- a/mirix/services/tool_execution_sandbox.py
+++ b/mirix/services/tool_execution_sandbox.py
@@ -1,11 +1,10 @@
-import asyncio
 import ast
+import asyncio
 import base64
 import os
 import pickle
 import sys
 import tempfile
-import traceback
 import uuid
 from typing import TYPE_CHECKING, Any, Dict, Optional
 
@@ -94,7 +93,9 @@ async def _execute_tool() -> SandboxRunResult:
                 return await self.run_e2b_sandbox(agent_state=agent_state, additional_env_vars=additional_env_vars)
             else:
                 logger.debug("Using local sandbox to execute %s", self.tool_name)
-                return await self.run_local_dir_sandbox(agent_state=agent_state, additional_env_vars=additional_env_vars)
+                return await self.run_local_dir_sandbox(
+                    agent_state=agent_state, additional_env_vars=additional_env_vars
+                )
 
         if langfuse and trace_id:
             from typing import cast
@@ -175,9 +176,7 @@ async def run_local_dir_sandbox(
         with tempfile.NamedTemporaryFile(
             mode="w", dir=local_configs.sandbox_dir, suffix=".py", delete=False
         ) as temp_file:
-            code = self.generate_execution_script(
-                agent_state=agent_state, wrap_print_with_markers=True
-            )
+            code = self.generate_execution_script(agent_state=agent_state, wrap_print_with_markers=True)
             temp_file.write(code)
             temp_file.flush()
             temp_file_path = temp_file.name
@@ -223,9 +222,7 @@ async def run_local_dir_sandbox_venv(
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
             )
-            stdout_bytes, stderr_bytes = await asyncio.wait_for(
-                process.communicate(), timeout=60
-            )
+            stdout_bytes, stderr_bytes = await asyncio.wait_for(process.communicate(), timeout=60)
             stdout_text = stdout_bytes.decode() if stdout_bytes else ""
             stderr_text = stderr_bytes.decode() if stderr_bytes else ""
 
@@ -278,24 +275,20 @@ async def run_local_dir_sandbox_runpy(
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
             )
-            stdout_bytes, stderr_bytes = await asyncio.wait_for(
-                process.communicate(), timeout=60
-            )
+            stdout_bytes, stderr_bytes = await asyncio.wait_for(process.communicate(), timeout=60)
             stdout_text = stdout_bytes.decode() if stdout_bytes else ""
             stderr_text = stderr_bytes.decode() if stderr_bytes else ""
 
             if process.returncode != 0:
                 logger.error(
                     "Executing tool %s failed with return code %d",
-                    self.tool_name, process.returncode,
+                    self.tool_name,
+                    process.returncode,
                 )
                 func_return = get_friendly_error_msg(
                     function_name=self.tool_name,
                     exception_name="SubprocessError",
-                    exception_message=(
-                        f"Process exited with code {process.returncode}: "
-                        f"{stderr_text}"
-                    ),
+                    exception_message=(f"Process exited with code {process.returncode}: " f"{stderr_text}"),
                 )
                 return SandboxRunResult(
                     func_return=func_return,
@@ -306,9 +299,7 @@ async def run_local_dir_sandbox_runpy(
                     sandbox_config_fingerprint=sbx_config.fingerprint(),
                 )
 
-            func_result, stdout_parsed = (
-                self.parse_out_function_results_markers(stdout_text)
-            )
+            func_result, stdout_parsed = self.parse_out_function_results_markers(stdout_text)
             func_return, agent_state = self.parse_best_effort(func_result)
             return SandboxRunResult(
                 func_return=func_return,
@@ -320,15 +311,10 @@ async def run_local_dir_sandbox_runpy(
             )
 
         except asyncio.TimeoutError:
-            raise TimeoutError(
-                f"Executing tool {self.tool_name} has timed out."
-            )
+            raise TimeoutError(f"Executing tool {self.tool_name} has timed out.")
 
         except Exception as e:
-            logger.error(
-                f"Executing tool {self.tool_name} has an unexpected "
-                f"error: {e}"
-            )
+            logger.error(f"Executing tool {self.tool_name} has an unexpected " f"error: {e}")
             raise e
 
     def parse_out_function_results_markers(self, text: str):
@@ -344,21 +330,26 @@ def parse_out_function_results_markers(self, text: str):
 
     async def create_venv_for_local_sandbox(self, sandbox_dir_path: str, venv_path: str, env: Dict[str, str]):
         process = await asyncio.create_subprocess_exec(
-            sys.executable, "-m", "venv", "--with-pip", venv_path,
+            sys.executable,
+            "-m",
+            "venv",
+            "--with-pip",
+            venv_path,
             stdout=asyncio.subprocess.PIPE,
             stderr=asyncio.subprocess.PIPE,
         )
         _, stderr = await process.communicate()
         if process.returncode != 0:
-            raise RuntimeError(
-                f"venv creation failed: {stderr.decode() if stderr else ''}"
-            )
+            raise RuntimeError(f"venv creation failed: {stderr.decode() if stderr else ''}")
 
         pip_path = os.path.join(venv_path, "bin", "pip")
         try:
             logger.info("Upgrading pip in the virtual environment...")
             process = await asyncio.create_subprocess_exec(
-                pip_path, "install", "--upgrade", "pip",
+                pip_path,
+                "install",
+                "--upgrade",
+                "pip",
                 env=env,
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
@@ -371,7 +362,10 @@ async def create_venv_for_local_sandbox(self, sandbox_dir_path: str, venv_path:
             if os.path.isfile(requirements_txt_path):
                 logger.info(f"Installing packages from requirements file: {requirements_txt_path}")
                 process = await asyncio.create_subprocess_exec(
-                    pip_path, "install", "-r", requirements_txt_path,
+                    pip_path,
+                    "install",
+                    "-r",
+                    requirements_txt_path,
                     env=env,
                     stdout=asyncio.subprocess.PIPE,
                     stderr=asyncio.subprocess.PIPE,
diff --git a/mirix/services/tool_manager.py b/mirix/services/tool_manager.py
index 929d5a5ae..fdca10d2f 100644
--- a/mirix/services/tool_manager.py
+++ b/mirix/services/tool_manager.py
@@ -40,9 +40,7 @@ def __init__(self):
 
     # TODO: Refactor this across the codebase to use CreateTool instead of passing in a Tool object
     @enforce_types
-    async def create_or_update_tool(
-        self, pydantic_tool: PydanticTool, actor: PydanticClient
-    ) -> PydanticTool:
+    async def create_or_update_tool(self, pydantic_tool: PydanticTool, actor: PydanticClient) -> PydanticTool:
         """Create or update a tool (async)."""
         tool = await self.get_tool_by_name(tool_name=pydantic_tool.name, actor=actor)
         if tool:
@@ -76,9 +74,7 @@ async def get_tool_by_id(self, tool_id: str, actor: PydanticClient) -> PydanticT
             return tool.to_pydantic()
 
     @enforce_types
-    async def get_tool_by_name(
-        self, tool_name: str, actor: PydanticClient
-    ) -> Optional[PydanticTool]:
+    async def get_tool_by_name(self, tool_name: str, actor: PydanticClient) -> Optional[PydanticTool]:
         """Retrieve a tool by name (async)."""
         try:
             async with self.session_maker() as session:
@@ -105,9 +101,7 @@ async def list_tools(
             return [tool.to_pydantic() for tool in tools]
 
     @enforce_types
-    async def update_tool_by_id(
-        self, tool_id: str, tool_update: ToolUpdate, actor: PydanticClient
-    ) -> PydanticTool:
+    async def update_tool_by_id(self, tool_id: str, tool_update: ToolUpdate, actor: PydanticClient) -> PydanticTool:
         """Update a tool by its ID (async)."""
         async with self.session_maker() as session:
             tool = await ToolModel.read(db_session=session, identifier=tool_id, actor=actor)
@@ -126,9 +120,7 @@ async def delete_tool_by_id(self, tool_id: str, actor: PydanticClient) -> None:
         """Delete a tool by its ID."""
         async with self.session_maker() as session:
             try:
-                tool = await ToolModel.read(
-                    db_session=session, identifier=tool_id, actor=actor
-                )
+                tool = await ToolModel.read(db_session=session, identifier=tool_id, actor=actor)
                 await tool.hard_delete(db_session=session, actor=actor)
             except NoResultFound:
                 raise ValueError(f"Tool with id {tool_id} not found.")
diff --git a/mirix/services/user_manager.py b/mirix/services/user_manager.py
index 45f50144c..c5d50aa0a 100755
--- a/mirix/services/user_manager.py
+++ b/mirix/services/user_manager.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple
+from typing import List, Optional
 
 from sqlalchemy import select
 
@@ -69,39 +69,27 @@ async def create_user(self, pydantic_user: PydanticUser) -> PydanticUser:
     async def update_user(self, user_update: UserUpdate) -> PydanticUser:
         """Update user details (with cache invalidation)."""
         async with self.session_maker() as session:
-            existing_user = await UserModel.read(
-                db_session=session, identifier=user_update.id
-            )
-            update_data = user_update.model_dump(
-                exclude_unset=True, exclude_none=True
-            )
+            existing_user = await UserModel.read(db_session=session, identifier=user_update.id)
+            update_data = user_update.model_dump(exclude_unset=True, exclude_none=True)
             for key, value in update_data.items():
                 setattr(existing_user, key, value)
             await existing_user.update_with_redis(session, actor=None)
             return existing_user.to_pydantic()
 
     @enforce_types
-    async def update_user_timezone(
-        self, timezone_str: str, user_id: str
-    ) -> PydanticUser:
+    async def update_user_timezone(self, timezone_str: str, user_id: str) -> PydanticUser:
         """Update the timezone of a user (with cache invalidation)."""
         async with self.session_maker() as session:
-            existing_user = await UserModel.read(
-                db_session=session, identifier=user_id
-            )
+            existing_user = await UserModel.read(db_session=session, identifier=user_id)
             existing_user.timezone = timezone_str
             await existing_user.update_with_redis(session, actor=None)
             return existing_user.to_pydantic()
 
     @enforce_types
-    async def update_user_status(
-        self, user_id: str, status: str
-    ) -> PydanticUser:
+    async def update_user_status(self, user_id: str, status: str) -> PydanticUser:
         """Update the status of a user (with cache invalidation)."""
         async with self.session_maker() as session:
-            existing_user = await UserModel.read(
-                db_session=session, identifier=user_id
-            )
+            existing_user = await UserModel.read(db_session=session, identifier=user_id)
             existing_user.status = status
             await existing_user.update_with_redis(session, actor=None)
             return existing_user.to_pydantic()
diff --git a/mirix/services/utils.py b/mirix/services/utils.py
index e08f6da53..7774675b8 100644
--- a/mirix/services/utils.py
+++ b/mirix/services/utils.py
@@ -113,11 +113,11 @@ def update_timezone(func):
     """Decorator that applies timezone conversion to datetime fields on returned results.
     Only supports async functions (MIRIX is async-native).
     """
+
     @wraps(func)
     async def wrapper(*args, **kwargs):
-        timezone_str = (
-            kwargs.get("timezone_str")
-            or (getattr(kwargs.get("actor"), "timezone", "UTC") if kwargs.get("actor") else None)
+        timezone_str = kwargs.get("timezone_str") or (
+            getattr(kwargs.get("actor"), "timezone", "UTC") if kwargs.get("actor") else None
         )
         results = await func(*args, **kwargs)
         if results is None or not timezone_str:
diff --git a/mirix/utils.py b/mirix/utils.py
index 33a2809db..4d7006f97 100755
--- a/mirix/utils.py
+++ b/mirix/utils.py
@@ -24,8 +24,6 @@
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
-    Any,
-    Dict,
     List,
     Optional,
     Union,
@@ -37,8 +35,8 @@
 from urllib.parse import urljoin, urlparse
 
 import demjson3 as demjson
-import pytz
 import httpx
+import pytz
 import tiktoken
 from pathvalidate import sanitize_filename as pathvalidate_sanitize_filename
 
@@ -947,9 +945,6 @@ def get_local_time(timezone=None):
     return time_str.strip()
 
 
-# get_utc_time is imported from mirix.client.utils
-
-
 def format_datetime(dt):
     return dt.strftime("%Y-%m-%d %I:%M:%S %p %Z%z")
 
diff --git a/poetry.lock b/poetry.lock
index 287b9973e..91db575bb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,40 @@
 # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
 
+[[package]]
+name = "aiofiles"
+version = "25.1.0"
+description = "File support for asyncio."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695"},
+    {file = "aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2"},
+]
+
+[[package]]
+name = "aiogoogle"
+version = "5.17.0"
+description = "Async Google API client"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "aiogoogle-5.17.0-py3-none-any.whl", hash = "sha256:75f69810969bd16521896fb4dab784ee7a184ba84b898d7da6370a0682fc9179"},
+    {file = "aiogoogle-5.17.0.tar.gz", hash = "sha256:3206674d953478599d47587e19db0fc831119abb31ce5f1acde8807e0f0a48c6"},
+]
+
+[package.dependencies]
+aiofiles = "*"
+aiohttp = "*"
+async-timeout = "*"
+google-auth = "*"
+tonyg-rfc3339 = "*"
+
+[package.extras]
+curio-asks = ["asks", "curio"]
+trio-asks = ["asks", "trio"]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
@@ -355,6 +390,24 @@ files = [
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
 ]
 
+[[package]]
+name = "asyncddgs"
+version = "0.1.0a1"
+description = "Asynchronous DuckDuckGo Search API: A FastAPI service for async access to DuckDuckGo’s text, image, video, and news searches. Uses a custom aDDGS class with aiohttp and asyncio for concurrent queries. Supports advanced syntax and proxies."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "asyncddgs-0.1.0a1-py3-none-any.whl", hash = "sha256:4eeb32d08ab0347934c8487d2977a66f0f5ccf12e2c00a11ad8e927bff5e1595"},
+    {file = "asyncddgs-0.1.0a1.tar.gz", hash = "sha256:3f8e10feada699ab1b39a4716f08a65104c07de13ba059fc34edc7cc86aa313c"},
+]
+
+[package.dependencies]
+aiohttp = "*"
+fastapi = "*"
+lxml = "*"
+uvicorn = "*"
+
 [[package]]
 name = "asyncpg"
 version = "0.31.0"
@@ -1481,53 +1534,6 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto
 test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""]
 tqdm = ["tqdm"]
 
-[[package]]
-name = "google-api-core"
-version = "2.28.1"
-description = "Google API client core library"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c"},
-    {file = "google_api_core-2.28.1.tar.gz", hash = "sha256:2b405df02d68e68ce0fbc138559e6036559e685159d148ae5861013dc201baf8"},
-]
-
-[package.dependencies]
-google-auth = ">=2.14.1,<3.0.0"
-googleapis-common-protos = ">=1.56.2,<2.0.0"
-proto-plus = [
-    {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
-    {version = ">=1.22.3,<2.0.0", markers = "python_version < \"3.13\""},
-]
-protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
-requests = ">=2.18.0,<3.0.0"
-
-[package.extras]
-async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.0)"]
-grpc = ["grpcio (>=1.33.2,<2.0.0)", "grpcio (>=1.49.1,<2.0.0) ; python_version >= \"3.11\"", "grpcio (>=1.75.1,<2.0.0) ; python_version >= \"3.14\"", "grpcio-status (>=1.33.2,<2.0.0)", "grpcio-status (>=1.49.1,<2.0.0) ; python_version >= \"3.11\"", "grpcio-status (>=1.75.1,<2.0.0) ; python_version >= \"3.14\""]
-grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
-grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
-
-[[package]]
-name = "google-api-python-client"
-version = "2.187.0"
-description = "Google API Client Library for Python"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "google_api_python_client-2.187.0-py3-none-any.whl", hash = "sha256:d8d0f6d85d7d1d10bdab32e642312ed572bdc98919f72f831b44b9a9cebba32f"},
-    {file = "google_api_python_client-2.187.0.tar.gz", hash = "sha256:e98e8e8f49e1b5048c2f8276473d6485febc76c9c47892a8b4d1afa2c9ec8278"},
-]
-
-[package.dependencies]
-google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0"
-google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
-google-auth-httplib2 = ">=0.2.0,<1.0.0"
-httplib2 = ">=0.19.0,<1.0.0"
-uritemplate = ">=3.0.1,<5"
-
 [[package]]
 name = "google-auth"
 version = "2.43.0"
@@ -1555,22 +1561,6 @@ requests = ["requests (>=2.20.0,<3.0.0)"]
 testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"]
 urllib3 = ["packaging", "urllib3"]
 
-[[package]]
-name = "google-auth-httplib2"
-version = "0.2.1"
-description = "Google Authentication Library: httplib2 transport"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "google_auth_httplib2-0.2.1-py3-none-any.whl", hash = "sha256:1be94c611db91c01f9703e7f62b0a59bbd5587a95571c7b6fade510d648bc08b"},
-    {file = "google_auth_httplib2-0.2.1.tar.gz", hash = "sha256:5ef03be3927423c87fb69607b42df23a444e434ddb2555b73b3679793187b7de"},
-]
-
-[package.dependencies]
-google-auth = ">=1.32.0,<3.0.0"
-httplib2 = ">=0.19.0,<1.0.0"
-
 [[package]]
 name = "google-auth-oauthlib"
 version = "1.2.2"
@@ -2017,21 +2007,6 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 trio = ["trio (>=0.22.0,<1.0)"]
 
-[[package]]
-name = "httplib2"
-version = "0.31.0"
-description = "A comprehensive HTTP client library."
-optional = false
-python-versions = ">=3.6"
-groups = ["main"]
-files = [
-    {file = "httplib2-0.31.0-py3-none-any.whl", hash = "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24"},
-    {file = "httplib2-0.31.0.tar.gz", hash = "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c"},
-]
-
-[package.dependencies]
-pyparsing = ">=3.0.4,<4"
-
 [[package]]
 name = "httptools"
 version = "0.7.1"
@@ -2232,8 +2207,8 @@ files = [
 ]
 
 [package.dependencies]
-decorator = {version = "*", markers = "python_version >= \"3.11\""}
-ipython = {version = ">=7.31.1", markers = "python_version >= \"3.11\""}
+decorator = {version = "*", markers = "python_version > \"3.6\""}
+ipython = {version = ">=7.31.1", markers = "python_version > \"3.6\""}
 tomli = {version = "*", markers = "python_version > \"3.6\" and python_version < \"3.11\""}
 
 [[package]]
@@ -2605,7 +2580,7 @@ description = "Mypyc runtime library"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"dev\" and platform_python_implementation != \"PyPy\""
+markers = "platform_python_implementation != \"PyPy\" and extra == \"dev\""
 files = [
     {file = "librt-0.7.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b45306a1fc5f53c9330fbee134d8b3227fe5da2ab09813b892790400aa49352d"},
     {file = "librt-0.7.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:864c4b7083eeee250ed55135d2127b260d7eb4b5e953a9e5df09c852e327961b"},
@@ -2959,6 +2934,162 @@ files = [
 [package.dependencies]
 llama-cloud-services = ">=0.6.54"
 
+[[package]]
+name = "lxml"
+version = "6.0.2"
+description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"},
+    {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c"},
+    {file = "lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b"},
+    {file = "lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0"},
+    {file = "lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5"},
+    {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607"},
+    {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7"},
+    {file = "lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46"},
+    {file = "lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078"},
+    {file = "lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285"},
+    {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456"},
+    {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322"},
+    {file = "lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849"},
+    {file = "lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f"},
+    {file = "lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6"},
+    {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77"},
+    {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314"},
+    {file = "lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2"},
+    {file = "lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7"},
+    {file = "lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf"},
+    {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe"},
+    {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c"},
+    {file = "lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b"},
+    {file = "lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed"},
+    {file = "lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8"},
+    {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d"},
+    {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f"},
+    {file = "lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312"},
+    {file = "lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca"},
+    {file = "lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c"},
+    {file = "lxml-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62"},
+    {file = "lxml-6.0.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f"},
+    {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7"},
+    {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3"},
+    {file = "lxml-6.0.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac"},
+    {file = "lxml-6.0.2-cp38-cp38-win32.whl", hash = "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512"},
+    {file = "lxml-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca"},
+    {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694"},
+    {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb"},
+    {file = "lxml-6.0.2-cp39-cp39-win32.whl", hash = "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f"},
+    {file = "lxml-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304"},
+    {file = "lxml-6.0.2-cp39-cp39-win_arm64.whl", hash = "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e"},
+    {file = "lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62"},
+]
+
+[package.extras]
+cssselect = ["cssselect (>=0.7)"]
+html-clean = ["lxml_html_clean"]
+html5 = ["html5lib"]
+htmlsoup = ["BeautifulSoup4"]
+
 [[package]]
 name = "markdown"
 version = "3.10"
@@ -3794,8 +3925,8 @@ files = [
 [package.dependencies]
 googleapis-common-protos = ">=1.57,<2.0"
 grpcio = [
-    {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
     {version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
+    {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
 ]
 opentelemetry-api = ">=1.15,<2.0"
 opentelemetry-exporter-otlp-proto-common = "1.38.0"
@@ -3989,9 +4120,9 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
     {version = ">=1.22.4", markers = "python_version < \"3.11\""},
     {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -4393,24 +4524,6 @@ files = [
     {file = "propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d"},
 ]
 
-[[package]]
-name = "proto-plus"
-version = "1.26.1"
-description = "Beautiful, Pythonic protocol buffers"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
-    {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
-]
-
-[package.dependencies]
-protobuf = ">=3.19.0,<7.0.0"
-
-[package.extras]
-testing = ["google-api-core (>=1.31.5)"]
-
 [[package]]
 name = "protobuf"
 version = "5.29.5"
@@ -4864,21 +4977,6 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
 docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
-[[package]]
-name = "pyparsing"
-version = "3.2.5"
-description = "pyparsing - Classes and methods to define and execute parsing grammars"
-optional = false
-python-versions = ">=3.9"
-groups = ["main"]
-files = [
-    {file = "pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e"},
-    {file = "pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6"},
-]
-
-[package.extras]
-diagrams = ["jinja2", "railroad-diagrams"]
-
 [[package]]
 name = "pypdf"
 version = "6.3.0"
@@ -6360,6 +6458,17 @@ files = [
     {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
 ]
 
+[[package]]
+name = "tonyg-rfc3339"
+version = "0.1"
+description = "Python implementation of RFC 3339"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "tonyg-rfc3339-0.1.tar.gz", hash = "sha256:e424e7b4ddf2a2f5c70d7317faecf9b69b7da099c9fc08d046c3ac679dd30d3d"},
+]
+
 [[package]]
 name = "tqdm"
 version = "4.67.1"
@@ -6453,18 +6562,6 @@ files = [
     {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"},
 ]
 
-[[package]]
-name = "uritemplate"
-version = "4.2.0"
-description = "Implementation of RFC 6570 URI Templates"
-optional = false
-python-versions = ">=3.9"
-groups = ["main"]
-files = [
-    {file = "uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686"},
-    {file = "uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e"},
-]
-
 [[package]]
 name = "urllib3"
 version = "2.5.0"
@@ -6517,7 +6614,7 @@ description = "Fast implementation of asyncio event loop on top of libuv"
 optional = false
 python-versions = ">=3.8.1"
 groups = ["main"]
-markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\""
+markers = "platform_python_implementation != \"PyPy\" and sys_platform != \"win32\" and sys_platform != \"cygwin\""
 files = [
     {file = "uvloop-0.22.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c"},
     {file = "uvloop-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792"},
@@ -7069,4 +7166,4 @@ voice = ["SpeechRecognition", "pydub"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<4.0"
-content-hash = "0faeb470f574a981326b51a4a67708ca5d5ca98a163a0015993b0e1adc425f5e"
+content-hash = "f504f7ab5b128a6c85e49e17bb915bf0509e871cafd9c5d8dbce44824bf6d9d8"
diff --git a/pyproject.toml b/pyproject.toml
index 2477ff926..52783c805 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -172,21 +172,23 @@ skip_glob = ["frontend/**"]
 
 [tool.mypy]
 python_version = "3.10"
-warn_return_any = true
-warn_unused_configs = true
+warn_return_any = false
+warn_unused_configs = false
 disallow_untyped_defs = false
 disallow_incomplete_defs = false
-check_untyped_defs = true
+check_untyped_defs = false
 disallow_untyped_decorators = false
-no_implicit_optional = true
-warn_redundant_casts = true
-warn_unused_ignores = true
-warn_no_return = true
-warn_unreachable = true
-strict_equality = true
+no_implicit_optional = false
+warn_redundant_casts = false
+warn_unused_ignores = false
+warn_no_return = false
+warn_unreachable = false
+strict_equality = false
 show_error_codes = true
-show_column_numbers = true
+show_column_numbers = false
 pretty = true
+ignore_errors = true
+follow_imports = "skip"
 
 # Exclude patterns
 exclude = [
@@ -198,14 +200,8 @@ exclude = [
 
 # Per-module options
 [[tool.mypy.overrides]]
-module = [
-    "composio.*",
-    "llama_index.*",
-    "llama-index-embeddings-google-genai.*",
-    "pgvector.*",
-    "mcp.*",
-]
-ignore_missing_imports = true
+module = "*"
+ignore_errors = true
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -216,6 +212,22 @@ addopts = "-v --tb=short"
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "session"
 
+[tool.ruff]
+line-length = 120
+target-version = "py310"
+
+[tool.ruff.lint]
+# E402: Module level import not at top of file (intentional for circular imports)
+# E712: Comparison to True/False (used in SQLAlchemy filters)
+# E722: Bare except (some legacy code)
+# E731: Lambda assignment (acceptable pattern)
+# F401: Unused import (some imports used by type checking)
+# F841: Unused variable (some intentional like client_id extraction)
+ignore = ["E402", "E712", "E722", "E731", "F401", "F841"]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["F401"]  # Unused imports OK in __init__.py files
+
 
 [tool.poetry.group.dev.dependencies]
 grpcio-tools = ">=1.66.0,<1.67.0"
diff --git a/tests/test_orm_to_pydantic_safe.py b/tests/test_orm_to_pydantic_safe.py
new file mode 100644
index 000000000..8a2e6f28a
--- /dev/null
+++ b/tests/test_orm_to_pydantic_safe.py
@@ -0,0 +1,347 @@
+"""
+Test ORM to_pydantic() conversions to ensure they don't trigger MissingGreenlet errors.
+
+This test ensures that:
+1. ORM models can be safely converted to Pydantic even when detached from session
+2. Relationship access doesn't trigger lazy loading outside async context
+3. Meta agent and memory manager flows work correctly
+"""
+
+import pytest
+from sqlalchemy import select
+
+from mirix.orm import Agent as AgentModel
+from mirix.orm.episodic_memory import EpisodicEvent
+from mirix.orm.knowledge_vault import KnowledgeVaultItem
+from mirix.orm.procedural_memory import ProceduralMemoryItem
+from mirix.orm.resource_memory import ResourceMemoryItem
+from mirix.orm.semantic_memory import SemanticMemoryItem
+from mirix.schemas.agent import AgentState as PydanticAgentState
+from mirix.schemas.embedding_config import EmbeddingConfig
+from mirix.schemas.episodic_memory import EpisodicEvent as PydanticEpisodicEvent
+from mirix.schemas.llm_config import LLMConfig
+
+
+@pytest.mark.asyncio
+async def test_agent_to_pydantic_with_session(server):
+    """Test Agent.to_pydantic() inside an async session."""
+    from mirix.server.server import db_context
+
+    # Get test client
+    actor = server.default_client
+    
+    # Create an agent with tools
+    from mirix.schemas.agent import CreateAgent
+    agent_create = CreateAgent(
+        name="test_agent_conversion",
+        llm_config=LLMConfig.default_config("gpt-4"),
+        embedding_config=EmbeddingConfig.default_config("text-embedding-004"),
+        include_base_tools=True,
+    )
+    
+    agent_state = await server.agent_manager.create_agent(
+        agent_create=agent_create,
+        actor=actor,
+    )
+    
+    # Now fetch it back and convert inside session
+    async with db_context() as session:
+        agent = await AgentModel.read(
+            db_session=session,
+            identifier=agent_state.id,
+            actor=actor,
+        )
+        
+        # This should work - we're inside the session
+        pydantic_agent = agent.to_pydantic()
+        
+        assert isinstance(pydantic_agent, PydanticAgentState)
+        assert pydantic_agent.id == agent_state.id
+        assert pydantic_agent.name == "test_agent_conversion"
+        # tools should be present (loaded via selectin)
+        assert isinstance(pydantic_agent.tools, list)
+
+
+@pytest.mark.asyncio
+async def test_agent_to_pydantic_detached(server):
+    """Test Agent.to_pydantic() on a detached instance (session closed)."""
+    from mirix.server.server import db_context
+
+    actor = server.default_client
+    
+    # Create an agent
+    from mirix.schemas.agent import CreateAgent
+    agent_create = CreateAgent(
+        name="test_agent_detached",
+        llm_config=LLMConfig.default_config("gpt-4"),
+        embedding_config=EmbeddingConfig.default_config("text-embedding-004"),
+        include_base_tools=False,  # No tools to simplify
+    )
+    
+    agent_state = await server.agent_manager.create_agent(
+        agent_create=agent_create,
+        actor=actor,
+    )
+    
+    # Fetch agent and close session
+    async with db_context() as session:
+        agent = await AgentModel.read(
+            db_session=session,
+            identifier=agent_state.id,
+            actor=actor,
+        )
+        # Session closes here
+    
+    # Now agent is detached - to_pydantic() should still work
+    # It should use cached/loaded relationships or empty list
+    pydantic_agent = agent.to_pydantic()
+    
+    assert isinstance(pydantic_agent, PydanticAgentState)
+    assert pydantic_agent.id == agent_state.id
+    assert pydantic_agent.name == "test_agent_detached"
+    # tools might be empty list (not loaded) or loaded collection
+    assert isinstance(pydantic_agent.tools, list)
+
+
+@pytest.mark.asyncio
+async def test_episodic_memory_to_pydantic(server):
+    """Test EpisodicEvent.to_pydantic() doesn't trigger relationship loading."""
+    from datetime import datetime
+
+    from mirix.server.server import db_context
+
+    actor = server.default_client
+    user = server.admin_user
+    
+    # Create an episodic event
+    from mirix.schemas.episodic_memory import EpisodicEvent as EpisodicEventCreate
+    
+    event_data = {
+        "event_type": "test_event",
+        "actor": "system",
+        "summary": "Test event for conversion",
+        "details": "Testing to_pydantic conversion safety",
+        "occurred_at": datetime.now(),
+        "user_id": user.id,
+        "organization_id": user.organization_id,
+    }
+    
+    event = await server.episodic_memory_manager.create_episodic_memory(
+        agent_state=None,
+        event=EpisodicEventCreate(**event_data),
+        actor=actor,
+    )
+    
+    # Fetch and convert inside session
+    async with db_context() as session:
+        result = await session.execute(
+            select(EpisodicEvent).where(EpisodicEvent.id == event.id)
+        )
+        orm_event = result.scalar_one()
+        
+        # Convert inside session
+        pydantic_event = orm_event.to_pydantic()
+        assert isinstance(pydantic_event, PydanticEpisodicEvent)
+        assert pydantic_event.id == event.id
+        assert pydantic_event.summary == "Test event for conversion"
+    
+    # Now test after session closed
+    async with db_context() as session:
+        result = await session.execute(
+            select(EpisodicEvent).where(EpisodicEvent.id == event.id)
+        )
+        orm_event = result.scalar_one()
+    
+    # Session closed - to_pydantic() should still work
+    pydantic_event = orm_event.to_pydantic()
+    assert isinstance(pydantic_event, PydanticEpisodicEvent)
+    assert pydantic_event.id == event.id
+
+
+@pytest.mark.asyncio
+async def test_memory_models_to_pydantic(server):
+    """Test all memory models' to_pydantic() methods work safely."""
+    from mirix.server.server import db_context
+
+    actor = server.default_client
+    user = server.admin_user
+    
+    # Test semantic memory
+    from mirix.schemas.semantic_memory import SemanticMemoryItem as SemanticCreate
+    semantic = await server.semantic_memory_manager.create_semantic_item(
+        agent_state=None,
+        item=SemanticCreate(
+            name="test_concept",
+            summary="Test summary",
+            details="Test details",
+            source="test",
+            user_id=user.id,
+            organization_id=user.organization_id,
+        ),
+        actor=actor,
+    )
+    
+    async with db_context() as session:
+        result = await session.execute(
+            select(SemanticMemoryItem).where(SemanticMemoryItem.id == semantic.id)
+        )
+        orm_semantic = result.scalar_one()
+    
+    # Detached conversion
+    pydantic_semantic = orm_semantic.to_pydantic()
+    assert pydantic_semantic.id == semantic.id
+    assert pydantic_semantic.name == "test_concept"
+    
+    # Test procedural memory
+    from mirix.schemas.procedural_memory import ProceduralMemoryItem as ProceduralCreate
+    procedural = await server.procedural_memory_manager.create_procedure(
+        agent_state=None,
+        item=ProceduralCreate(
+            summary="test_procedure",
+            description="Test description",
+            steps="Step 1\nStep 2",
+            tags=["test"],
+            user_id=user.id,
+            organization_id=user.organization_id,
+        ),
+        actor=actor,
+    )
+    
+    async with db_context() as session:
+        result = await session.execute(
+            select(ProceduralMemoryItem).where(ProceduralMemoryItem.id == procedural.id)
+        )
+        orm_procedural = result.scalar_one()
+    
+    pydantic_procedural = orm_procedural.to_pydantic()
+    assert pydantic_procedural.id == procedural.id
+    assert pydantic_procedural.summary == "test_procedure"
+    
+    # Test resource memory
+    from mirix.schemas.resource_memory import ResourceMemoryItem as ResourceCreate
+    resource = await server.resource_memory_manager.create_resource(
+        agent_state=None,
+        item=ResourceCreate(
+            summary="test_resource",
+            content="Resource content",
+            resource_type="document",
+            source="test",
+            user_id=user.id,
+            organization_id=user.organization_id,
+        ),
+        actor=actor,
+    )
+    
+    async with db_context() as session:
+        result = await session.execute(
+            select(ResourceMemoryItem).where(ResourceMemoryItem.id == resource.id)
+        )
+        orm_resource = result.scalar_one()
+    
+    pydantic_resource = orm_resource.to_pydantic()
+    assert pydantic_resource.id == resource.id
+    assert pydantic_resource.summary == "test_resource"
+    
+    # Test knowledge vault
+    from mirix.schemas.knowledge_vault import KnowledgeVaultItem as KnowledgeCreate
+    knowledge = await server.knowledge_vault_manager.create_knowledge(
+        agent_state=None,
+        item=KnowledgeCreate(
+            caption="test_knowledge",
+            secret_value="Secret data",
+            category="test",
+            user_id=user.id,
+            organization_id=user.organization_id,
+        ),
+        actor=actor,
+    )
+    
+    async with db_context() as session:
+        result = await session.execute(
+            select(KnowledgeVaultItem).where(KnowledgeVaultItem.id == knowledge.id)
+        )
+        orm_knowledge = result.scalar_one()
+    
+    pydantic_knowledge = orm_knowledge.to_pydantic()
+    assert pydantic_knowledge.id == knowledge.id
+    assert pydantic_knowledge.caption == "test_knowledge"
+
+
+@pytest.mark.asyncio
+async def test_list_agents_conversion_safety(server):
+    """Test list_agents flow (simulating meta agent initialization)."""
+    actor = server.default_client
+    
+    # Create multiple agents
+    from mirix.schemas.agent import CreateAgent
+    agent_names = ["meta_agent", "episodic_agent", "semantic_agent"]
+    
+    for name in agent_names:
+        await server.agent_manager.create_agent(
+            agent_create=CreateAgent(
+                name=name,
+                llm_config=LLMConfig.default_config("gpt-4"),
+                embedding_config=EmbeddingConfig.default_config("text-embedding-004"),
+                include_base_tools=True,
+            ),
+            actor=actor,
+        )
+    
+    # List agents (this is what MetaAgent does)
+    agents = await server.agent_manager.list_agents(actor=actor)
+    
+    # Should have all our test agents
+    assert len(agents) >= 3
+    
+    # All should be Pydantic models
+    for agent in agents:
+        assert isinstance(agent, PydanticAgentState)
+        assert agent.id is not None
+        assert isinstance(agent.tools, list)
+
+
+@pytest.mark.asyncio
+async def test_memory_manager_list_conversion(server):
+    """Test memory manager list_* methods convert safely."""
+    from datetime import datetime
+
+    actor = server.default_client
+    user = server.admin_user
+    
+    # Create test data
+    from mirix.schemas.agent import CreateAgent
+    agent = await server.agent_manager.create_agent(
+        agent_create=CreateAgent(
+            name="test_memory_agent",
+            llm_config=LLMConfig.default_config("gpt-4"),
+            embedding_config=EmbeddingConfig.default_config("text-embedding-004"),
+        ),
+        actor=actor,
+    )
+    
+    from mirix.schemas.episodic_memory import EpisodicEvent as EpisodicCreate
+    await server.episodic_memory_manager.create_episodic_memory(
+        agent_state=agent,
+        event=EpisodicCreate(
+            event_type="test",
+            actor="system",
+            summary="Test event",
+            details="Test details",
+            occurred_at=datetime.now(),
+            user_id=user.id,
+            organization_id=user.organization_id,
+        ),
+        actor=actor,
+    )
+    
+    # List episodic memory (this is what memory tools do)
+    events = await server.episodic_memory_manager.list_episodic_memory(
+        agent_state=agent,
+        user=user,
+        query="",
+        limit=10,
+    )
+    
+    # Should have at least our test event
+    assert len(events) >= 1
+    assert all(isinstance(e, PydanticEpisodicEvent) for e in events)

From 69ab9cbe3e591334eb39a97d235ad7dd730acb83 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Fri, 20 Mar 2026 09:38:58 -0700
Subject: [PATCH 03/13] feat(vepage-435): docs

---
 ...message_ids_refactor_plan_9eb1b08f.plan.md | 280 ++++++++++++------
 CLAUDE.md                                     |  23 +-
 2 files changed, 205 insertions(+), 98 deletions(-)

diff --git a/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md b/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md
index 50ff977a7..1ab24951e 100644
--- a/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md
+++ b/.cursor/plans/message_ids_refactor_plan_9eb1b08f.plan.md
@@ -9,37 +9,40 @@ todos:
     content: Construct system message on the fly from agent_state.system instead of storing it as a Message row
     status: pending
   - id: orm-changes
-    content: Add message_type to Message ORM; remove message_ids from Agent ORM; change messages relationship to lazy=noload
+    content: Remove message_ids from Agent ORM and change messages relationship to lazy=noload
     status: pending
   - id: schema-changes
-    content: Update Pydantic schemas for Agent (remove message_ids) and Message (add message_type)
+    content: Update Pydantic schemas for Agent (remove message_ids)
     status: pending
   - id: agent-manager-rewrite
     content: Rewrite AgentManager message methods to use query-based retrieval ordered by created_at, id
     status: pending
   - id: message-manager-updates
-    content: Add query-based fetch and bulk soft-delete methods; remove detached message cleanup
+    content: Add query-based fetch and bulk hard-delete methods for retention pruning; remove detached message cleanup
     status: pending
   - id: llm-api-layer
     content: Update Anthropic and other LLM clients that assume messages[0] is the system message
     status: pending
   - id: retention-config
-    content: Add message_retention_count to Client ORM/schema; implement retention enforcement at end of step()
+    content: Add message_set_retention_count to Client ORM/schema; implement retention enforcement at end of step()
+    status: pending
+  - id: summarization-cleanup
+    content: Remove unneeded summarization code paths/settings for memory extraction flow and fail directly on context overflow
     status: pending
   - id: cleanup-managers
-    content: Replace message_ids manipulation in UserManager, ClientManager with bulk message soft-delete
+    content: Replace message_ids manipulation in UserManager, ClientManager with bulk message hard-delete for retention pruning
     status: pending
   - id: api-client-sdk
     content: Update REST API, server, SDK, and client layers to remove message_ids references
     status: pending
-  - id: ecms-sync
-    content: Remove message_ids from ECMS IPSR agents entity
-    status: pending
   - id: migration
-    content: Create Alembic migration to add message_type, message_retention_count, indexes, soft-delete system messages, and drop message_ids
+    content: Create SQL migration to add message_set_retention_count, indexes, remove legacy system messages, and drop message_ids
     status: pending
   - id: tests
-    content: Update and add tests for new message management patterns
+    content: "Update and add tests for new message management patterns: unit tests (mocked, no infra) for granular method behavior + integration tests invoking REST API endpoints to verify end-to-end correctness. Rewrite test_message_handling.py and test_agent_prompt_update.py in-place. Run via: ./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO"
+    status: pending
+  - id: chat-agent-deprecation
+    content: Make chat_agent fail loudly (NotImplementedError) when step() is invoked — it is broken by this refactor and will be fixed in a follow-up. Add deprecation notice to docs/ARCHITECTURE.md and a warning comment in agent.py at the chat_agent branch.
     status: pending
 isProject: false
 ---
@@ -62,7 +65,7 @@ MIRIX manages agent conversation history through a `message_ids` JSON column on
 
 ## 2. Changes
 
-This proposal makes four interconnected changes:
+This proposal makes six interconnected changes:
 
 ### 2.1 Store Message History In-Memory only
 
@@ -87,8 +90,6 @@ Observability is not affected: the `steps` table and LangFuse traces still captu
 
 For chaining (multiple steps in one `step()` call), the in-memory list grows across steps within the same invocation. The LLM sees the full conversation history without any database round-trips between steps.
 
-
-
 ### 2.2 Store System Message Exclusively in Agent State
 
 Today, the system prompt is stored as a `Message` row with `role="system"` at position 0 of `message_ids`. The code in `inner_step()` reads it back, enriches it with retrieved memories, and mutates it in-memory before sending to the LLM. The `rebuild_system_prompt` method creates a new Message row and swaps `message_ids[0]` every time the prompt changes.
@@ -104,21 +105,17 @@ This eliminates:
 - The convention that `messages[0]` is always the system message (a source of fragile assumptions across the codebase)
 - The `get_system_message()` method (callers read `agent_state.system` directly)
 
-
-
 ### 2.3 Remove `message_ids` From Agent
 
 The `message_ids` JSON column on the `agents` table will be removed entirely. This is the main goal of this plan. For agent types that persist messages (retention > 0), conversation history is retrieved by querying the `messages` table directly, scoped by `(agent_id, user_id)` and ordered by `created_at`.
 
-**Ordering strategy:** `ORDER BY created_at, id`
+**Ordering strategy:** Retrieval uses `ORDER BY created_at DESC, id DESC LIMIT N` to select the newest `N` retained sets, then reverses in-memory to chronological order (oldest -> newest) before prompt assembly.
 
 - `created_at` reflects processing order, which is what the LLM actually saw. Kafka already guarantees in-order delivery per user (messages are partitioned by `user_id`), so processing order matches real-world order.
 - `id` is the tiebreaker for deterministic ordering when timestamps match (a practically impossible edge-case).
 
 The `messages` relationship on the Agent ORM also changes from `lazy="selectin"` to `lazy="noload"` to prevent accidental eager loading.
 
-
-
 ### 2.4 Configurable Message Retention Per Client
 
 Today, the history clearing behavior is hardcoded: after memory extraction, keep the system message + one "last edited memory item" summary + the most recent input message-set. Different clients have different needs:
@@ -126,14 +123,20 @@ Today, the history clearing behavior is hardcoded: after memory extraction, keep
 - A **batch client** (like ECMS) that sends an entire conversation thread as a single `save` call has no use for retained messages. It wants `N=0`.
 - An **interactive agent** that processes messages one at a time may benefit from seeing what it did in the last few invocations. It wants `N=5` or similar.
 
-**Change:** Add a `message_retention_count` field to the Client model. This integer controls how many recent **input message-sets** are retained in the database after processing.
+**Change:** Add a `message_set_retention_count` field to the Client model. This integer controls how many recent **input message-sets** are retained in the database after processing.
+
+A **message-set** is defined as the input conversation payload from a single `step()` invocation. In the current ECMS path, this is typically persisted as one `messages` row whose `content` contains a packed multi-turn sequence (e.g., `[USER]... [ASSISTANT]...`). It does not include the agent's internal working messages (tool calls, tool results, heartbeats, intermediate assistant/tool chain messages). Those exist only in the in-memory accumulator during the step and are not persisted.
 
-A **message-set** is defined as the input messages from a single `step()` invocation — i.e., the conversation content that was sent to the agent for processing. It does not include the agent's internal working messages (assistant responses, tool calls, tool results, heartbeats). Those exist only in the in-memory accumulator during the step and are not persisted.
+- `message_set_retention_count = 0` -- No messages persisted. All agent work is in-memory only. (Default for memory extraction clients.)
+- `message_set_retention_count = N` -- Keep the N most recent input message-sets per `(agent_id, user_id)`. Older sets are hard-deleted.
 
-- `message_retention_count = 0` -- No messages persisted. All agent work is in-memory only. (Default for memory extraction clients.)
-- `message_retention_count = N` -- Keep the N most recent input message-sets per `(agent_id, user_id)`. Older sets are soft-deleted.
+**Runtime contract for retention changes:** retrieval and pruning both enforce `N`.
 
-When retention >= 1, the start of a `step()` invocation loads the retained input message-sets from the DB into the in-memory accumulator, giving the agent context about what it processed recently. If the accumulated in-memory context grows large (high N or large messages), summarization compresses older messages in the in-memory list. The post-summary state is what gets persisted at the end.
+- **Read path:** when loading retained context at the start of `step()`, query only the newest `N` sets using `ORDER BY created_at DESC, id DESC LIMIT N`, then **always reverse in-memory** before prompt assembly so the LLM sees chronological order (oldest -> newest).
+- **Write path:** after persisting current input set(s), hard-delete rows older than the newest `N`.
+- This guarantees that changing `message_set_retention_count` at runtime takes effect on the **very next save/step** even before background cleanup completes.
+
+When retention >= 1, the start of a `step()` invocation loads the retained input message-sets from the DB into the in-memory accumulator, giving the agent context about what it processed recently. This DB load is capped with `LIMIT N` so only the newest retained sets are considered. Persistence at end-of-step writes only the current invocation's input message-set(s), then enforces retention by hard-deleting older sets.
 
 This replaces:
 
@@ -143,27 +146,42 @@ This replaces:
 
 **Note:** The MIRIX chat agent (`chat_agent` type) is a known casualty of this change. It requires retention of full step outputs (including assistant responses, tool calls, and tool results) for conversational continuity. This will be addressed in a follow-up.
 
-**Schema change:** Add `message_retention_count` (nullable Integer, default 0) to `[mirix/orm/client.py](mirix/orm/client.py)` and `[mirix/schemas/client.py](mirix/schemas/client.py)`.
+**Schema change:** Add `message_set_retention_count` (nullable Integer, default 0) to `[mirix/orm/client.py](mirix/orm/client.py)` and `[mirix/schemas/client.py](mirix/schemas/client.py)`.
+
+### 2.5 Remove In-Loop Summarization for Memory Extraction
+
+The in-loop summarization path is removed from this refactor scope. For the memory extraction path, if the prompt exceeds the context window, the step should fail with an explicit context-overflow error and skip memory extraction for that message. This keeps behavior simple, removes extra LLM calls, and aligns with the low expected frequency of oversized inputs.
 
-### 2.5 Add `message_type` to Messages
+This is not only a behavior change: dead summarization branches used by this flow should be removed as part of the refactor (retry loops, summarizer-specific branching, and unused helper calls/settings in the memory extraction path).
 
-A `message_type` column (`String`, nullable) is added to the `messages` table. It distinguishes message origins: `"original"` for normal messages, `"summary"` for messages created by the summarizer. This replaces the implicit convention of identifying summaries by their content.
+### 2.6 Keep "Last Edited Memory Item" but only as Ephemeral Chaining Context
+
+The current "last edited memory item" signal is useful context for follow-up reasoning, but it should no longer be persisted as a retained `messages` row.
+
+**Change:**
+
+- Preserve the behavior as an **in-memory-only** helper message when another chain step is about to run.
+- Do **not** write this synthetic summary to the `messages` table.
+- Do **not** include it in retained message-sets (`message_set_retention_count` controls persisted input sets only).
+
+This keeps the useful self-awareness signal for chaining while avoiding storage churn and retention pollution.
 
 Note: `occurred_at` is **not** added to the messages table. The real-world timestamp of a conversation is already stored where it matters — on the memory records themselves (episodic events, raw memories, etc.). Message ordering uses `created_at` (processing order), which is correct because Kafka guarantees in-order delivery per user and the LLM's context should reflect what it actually saw, not a reconstructed timeline.
 
 ## 3. How It Works End-to-End
 
-### ECMS Memory Extraction (message_retention_count = 0)
+### Batch Client (message_set_retention_count = 0)
 
 ```
-1. POST /memory/add → put_messages() → Kafka
+1. Client save request → put_messages() → Kafka
 2. Worker consumes → _process_message_async() → server.send_messages()
-3. MetaMemoryAgent.step():
-   - Constructs system message in-memory from agent_state.system
-   - Sends [system_msg, input_msg] to LLM
-   - LLM returns trigger_memory_update(["episodic", "semantic"])
-   - Accumulates assistant response + tool result in-memory (not persisted)
-4. trigger_memory_update() runs sub-agents in parallel:
+3. Agent step starts:
+   - N=0 → skip retained-set load (nothing to retrieve)
+   - Construct system message from `agent_state.system`
+4. LLM execution:
+   - Send [system_msg, new_input] to LLM
+   - Accumulate assistant/tool/intermediate messages in-memory only (not persisted)
+5. Memory tool fan-out (if triggered):
    - EpisodicMemoryAgent.step():
      - Constructs system message in-memory
      - Sends [system_msg, input_copy] to LLM
@@ -171,47 +189,47 @@ Note: `occurred_at` is **not** added to the messages table. The real-world times
      - Tool executes → writes to episodic_events table (this IS persisted)
      - Accumulates messages in-memory (not persisted)
    - SemanticMemoryAgent.step(): same pattern
-5. retention_count=0 → no messages written to the messages table
-6. No agent row updates
-7. Kafka offset committed
+6. Retention write-back:
+   - `message_set_retention_count = 0` → do not persist input message-sets
+   - No retention prune needed
+7. No agent row updates (`message_ids` removed)
+8. Kafka offset handling unchanged in Phase 1 (existing auto-commit behavior remains)
 ```
 
-### Interactive Agent (message_retention_count = 3)
+### Real Time Client (message_set_retention_count = 3)
 
 ```
-1. Agent processes a message via step()
-2. Load retained input message-sets from DB into in-memory accumulator
-   → agent sees up to 3 prior input message-sets as context
-3. Construct system message from agent_state.system
-4. Send [system_msg, retained_inputs..., new_input] to LLM
-5. LLM responds, agent executes tools, all intermediate messages
-   accumulated in-memory (not persisted)
-6. At end of step(): persist the new input message-set to the messages table
-7. Enforce retention: soft-delete input message-sets older than the 3 most recent
+1. Client message/request enters `step()`
+2. Agent step starts:
+   - Load retained input sets with `ORDER BY created_at DESC, id DESC LIMIT N`
+   - Reverse in-memory to chronological order (oldest -> newest)
+   - Construct system message from `agent_state.system`
+3. LLM execution:
+   - Send [system_msg, retained_inputs..., new_input] to LLM
+   - Accumulate assistant/tool/intermediate messages in-memory only (not persisted)
+4. Memory tool fan-out (if triggered):
+   - Same sub-agent behavior as batch flow (memory table writes persist; message churn does not)
+5. Retention write-back:
+   - Persist current invocation input message-set(s) to `messages` (single row)
+6. Retention prune:
+   - Hard-delete rows older than newest `N` for `(agent_id, user_id)`
+7. No agent row updates (`message_ids` removed)
+8. Step completes with bounded retained context for next invocation
 ```
 
 ### MIRIX Chat Agent (known broken — follow-up)
 
 The chat agent requires retention of full step outputs (assistant responses, tool calls, tool results) for conversational continuity. This is not supported by the input-message-set-only retention model. The chat agent will be addressed in a follow-up change.
 
-### Summarization (retention >= 1 with high N)
-
-When retention is high, the in-memory accumulator can grow large (retained input message-sets from prior invocations + new messages from the current step). If the token count exceeds the memory pressure threshold, summarization compresses the in-memory list:
+### Context Overflow Behavior (no summarization)
 
-1. Detect memory pressure after an `inner_step()` completes
-2. Calculate cutoff using token counts on the in-memory list (same logic as today)
-3. Send older messages from the in-memory list to the LLM summarizer
-4. Replace the summarized messages in the in-memory list with a single summary message (`message_type="summary"`)
-5. Continue the `step()` loop with the reduced in-memory list
-6. At end of `step()`, persist the post-summary state to the DB (same as normal retention flow)
-
-No mid-step DB writes are needed. The summarizer works entirely on the in-memory list, and the final state — including the summary message — is what gets persisted at the end.
-
-For retention = 0, summarization never fires — there are no retained messages to accumulate, and memory extraction agents run a single step.
+If the message-sequence exceeds the model context window, the step fails with a context-overflow error. No summarization retry is attempted. The worker records the failure and proceeds according to retry/DLQ policy in Phase 2.
 
 ## 4. Edge Cases and Special Considerations
 
-**In-memory message loss on crash.** If a worker crashes mid-step, in-memory messages are lost. For memory agents this is fine -- the messages were going to be deleted anyway, and the Kafka offset hasn't been committed (Phase 2 will add manual commit). For chat agents, this is a behavior change: today a crash mid-step leaves partial messages in the DB. With this change, a crash loses the entire step's messages. This is arguably better (no partial state) and aligns with the Kafka retry strategy in Phase 2.
+**In-memory message loss on crash.** If a worker crashes mid-step, in-memory messages are lost. For memory agents this is acceptable because retained message-sets are ephemeral in this design. For chat agents, this is a behavior change: today a crash mid-step can leave partial messages in the DB. With this change, a crash can lose the entire step's in-memory message work. Note: this PR does **not** change Kafka offset commit semantics; existing auto-commit behavior remains. Manual commit/retry guarantees are deferred to Phase 2.
+
+**Context overflow now hard-fails extraction.** With summarization removed from scope, oversized inputs can fail memory extraction for that message. This is an accepted trade-off for Phase 1; Phase 2 retry/DLQ handling will surface these failures operationally.
 
 **Timestamp ties.** Two messages with the same `created_at` are disambiguated by `id`. In practice, messages within a step are created sequentially and differ by microseconds. The `id` tiebreaker gives a stable order for the rare tie case.
 
@@ -219,11 +237,50 @@ For retention = 0, summarization never fires — there are no retained messages
 
 **Anthropic client assumption.** The Anthropic LLM client asserts `messages[0].role == "system"`. The caller (`inner_step`) will prepend the system message before passing to the LLM client, so this assertion continues to hold. The change is that the system message comes from `agent_state.system` rather than from a DB row.
 
-**Removal of "last edited memory item" summary.** Today, the history clearing code builds a per-agent-type summary message (e.g., "Last edited memory item: [Episodic Event ID]: ...") and keeps it as the sole retained message. With configurable retention, a client using `message_retention_count=1` retains the raw messages from the last invocation, which contain the same information in the tool results. The synthetic summary construction (lines 1273-1391 in `agent.py`) is removed. If the synthetic summary format is specifically needed, it can be reintroduced as an optional post-processing step, but the raw tool results are arguably more useful since they contain the full structured data.
+**"Last edited memory item" becomes ephemeral.** Today, the history clearing code builds a per-agent-type summary message (e.g., "Last edited memory item: [Episodic Event ID]: ...") and persists it as retained history. With configurable retention, retained history is input message-sets only. Keep this summary as optional **in-memory chaining context** only; do not persist it.
+
+## 5. Database Migration
+
+SQL migration steps (no Alembic dependency assumed):
+
+1. Add `message_set_retention_count` (nullable `Integer`, default `0`) to `clients`
+2. Add/adjust composite index on `(agent_id, user_id, is_deleted, created_at, id)` to `messages`
+3. Delete legacy system messages (`role = 'system'`)
+4. Drop `message_ids` from `agents`
+5. No eager backfill/prune is required for retention-size changes: read path `LIMIT N` guarantees immediate behavior after config change; write path pruning converges storage on subsequent saves
 
-## 5. How This Sets Up Phase 2 (Kafka Durability, Idempotency, Retries)
+### Migration rollout strategy (explicit)
 
-This refactor is Phase 1. Phase 2 will add manual Kafka offset commit, retry limits, and a dead-letter queue. The changes in this refactor are specifically designed to make Phase 2 straightforward.
+Important: updating ORM models does **not** alter existing tables by itself. `Base.metadata.create_all` only creates missing tables; it does not add/drop columns on existing tables. Use explicit SQL migration steps for column/index changes.
+
+Compatibility note:
+
+- Additive schema changes are backward-compatible for old code as long as new columns are nullable or have safe defaults.
+- For this plan, adding `clients.message_set_retention_count DEFAULT 0` is intentionally safe for existing clients and existing code.
+- Breaking changes are contract-phase changes (e.g., dropping `agents.message_ids`) and must happen only after code cutover.
+
+Recommended sequence:
+
+1. **Expand schema first**
+  - Add `clients.message_set_retention_count` (default `0`)
+  - Add/adjust read-path index for retention queries
+  - Keep `agents.message_ids` in place temporarily during this phase
+2. **Ship compatible code**
+  - New read path uses query-based retrieval (`DESC + LIMIT N`, then in-memory reverse)
+  - New write path uses retention hard-delete
+  - Do not depend on `agents.message_ids`
+3. **Data cleanup**
+  - Delete legacy system-message rows from `messages`
+  - (Optional) one-time cleanup SQL to remove obsolete/non-retained rows if desired
+4. **Contract schema**
+  - Drop `agents.message_ids` only after code no longer reads/writes it anywhere
+5. **Validation**
+  - Existing clients with no explicit setting behave as `N=0` (default), matching current memory-extraction expectations
+  - Changing `N` at runtime takes effect on first subsequent step because read path enforces `LIMIT N`
+
+## 6. How This Sets Up Phase 2 (Kafka Durability, Idempotency, Retries)
+
+This refactor is Phase 1. Kafka offset semantics are unchanged here (existing auto-commit remains enabled). Phase 2 will add manual Kafka offset commit, retry limits, and a dead-letter queue. The changes in this refactor are specifically designed to make Phase 2 straightforward.
 
 ### 6.1 No Partial State Left Behind
 
@@ -251,52 +308,52 @@ For Phase 2, these are the operations that need idempotency keys. A natural key
 
 This refactor doesn't add idempotency keys yet, but it dramatically simplifies where they need to go. Instead of needing idempotency across messages table + agent row + memory tables, you only need it on the memory tables.
 
-## 6. Files to Modify
+## 7. Files to Modify
 
 ### Agent Execution (`[mirix/agent/agent.py](mirix/agent/agent.py)`)
 
 The heaviest changes. Key modifications:
 
 - `inner_step()`: construct system message from `agent_state.system`; append new messages to an in-memory list instead of calling `append_to_in_context_messages`; load `in_context_messages` from the in-memory list (for chaining) or from DB query (for first step with retention > 0)
-- `step()`: maintain the in-memory message accumulator; at end of loop, check client's `message_retention_count` to decide whether/how much to persist; enforce retention limit by soft-deleting excess message-sets
-- `_handle_ai_response()`: remove the entire `should_clear_history` / `CLEAR_HISTORY_AFTER_MEMORY_UPDATE` block and the per-agent-type "last edited memory item" logic. Retention is now handled uniformly at the end of `step()` based on the client config.
+- `step()`: maintain the in-memory message accumulator; at end of loop, check client's `message_set_retention_count` to decide whether/how much to persist; enforce retention limit by hard-deleting excess message-sets
+- `_handle_ai_response()`: remove the old persistence-oriented `should_clear_history` / `CLEAR_HISTORY_AFTER_MEMORY_UPDATE` block. Keep per-agent "last edited memory item" generation only as  in-memory chaining context (non-persistent).
 - `save_agent()`: remove `message_ids` write (this function may become a no-op or be removed)
-- `summarize_messages_inplace()`: rewrite to operate on the in-memory message list directly instead of reading/writing the DB. Calculate cutoff, call LLM summarizer, replace old messages with summary message in the list. No DB operations mid-step.
+- Remove summarizer retry/in-place compression path for memory extraction flow; context-overflow should raise and fail the step.
+- Remove now-unused summarizer wiring in this flow (imports, settings checks, and helper branches that only supported summarize-and-retry behavior).
 
 ### Agent Manager (`[mirix/services/agent_manager.py](mirix/services/agent_manager.py)`)
 
 Rewrite or remove message-related methods:
 
-- `get_in_context_messages()` -- query `messages` table by `(agent_id, user_id)`, no system message
+- `get_in_context_messages()` -- query `messages` table by `(agent_id, user_id)`, no system message; apply `ORDER BY created_at DESC, id DESC LIMIT N` based on client `message_set_retention_count`
 - `get_system_message()` -- return `agent_state.system` directly, or remove
 - `append_to_in_context_messages()` -- just create message rows (no agent row update). Only called for persistence at end of step.
-- `prepend_to_in_context_messages()` -- remove (summarizer creates messages directly; no need to "prepend" when ordering is by `created_at`)
+- `prepend_to_in_context_messages()` -- remove (no summarizer prepend path needed after this refactor)
 - `set_in_context_messages()` -- remove entirely
-- `trim_older_in_context_messages()` -- soft-delete older messages via query
-- `trim_all_in_context_messages_except_system()` -- rename to `clear_user_messages()`, soft-delete via query
-- `reset_messages()` -- soft-delete user's messages directly
+- `trim_older_in_context_messages()` -- hard-delete older messages via query
+- `trim_all_in_context_messages_except_system()` -- rename to `clear_user_messages()`, hard-delete via query
+- `reset_messages()` -- hard-delete user's messages directly
 - `rebuild_system_prompt()` -- just update `agent.system` column
 - `_generate_initial_message_sequence()` -- no longer creates a system message row
 - Remove `message_ids` from `_update_agent()` and Redis cache serialization
 
 ### Message Manager (`[mirix/services/message_manager.py](mirix/services/message_manager.py)`)
 
-- Add `get_messages_for_agent_user(agent_id, user_id)` -- query ordered by `created_at, id`
-- Add `soft_delete_user_messages(agent_id, user_id)` -- bulk soft-delete
+- Add `get_messages_for_agent_user(agent_id, user_id, limit=None)` -- query by `(agent_id, user_id)` with deterministic ordering; when used for retained-context load, call with `ORDER BY created_at DESC, id DESC LIMIT N`
+- Add `hard_delete_user_messages(agent_id, user_id)` -- bulk hard-delete for retention pruning
 - Remove `delete_detached_messages_for_agent()` and `cleanup_all_detached_messages()`
 
 ### ORM Models
 
-- `[mirix/orm/message.py](mirix/orm/message.py)` -- Add `message_type` column; add composite index on `(agent_id, user_id, is_deleted)`
+- `[mirix/orm/message.py](mirix/orm/message.py)` -- Add/adjust composite index for retained-message-set queries on `(agent_id, user_id, is_deleted, created_at, id)`
 - `[mirix/orm/agent.py](mirix/orm/agent.py)` -- Remove `message_ids` column; change `messages` relationship to `lazy="noload"`
-- `[mirix/orm/client.py](mirix/orm/client.py)` -- Add `message_retention_count` column (nullable Integer, default 0)
+- `[mirix/orm/client.py](mirix/orm/client.py)` -- Add `message_set_retention_count` column (nullable Integer, default 0)
 - `[mirix/orm/sqlalchemy_base.py](mirix/orm/sqlalchemy_base.py)` -- Remove `message_ids` from Redis cache serialization
 
 ### Pydantic Schemas
 
 - `[mirix/schemas/agent.py](mirix/schemas/agent.py)` -- Remove `message_ids` from `AgentState` and `UpdateAgent`
-- `[mirix/schemas/message.py](mirix/schemas/message.py)` -- Add `message_type` field
-- `[mirix/schemas/client.py](mirix/schemas/client.py)` -- Add `message_retention_count` field
+- `[mirix/schemas/client.py](mirix/schemas/client.py)` -- Add `message_set_retention_count` field
 
 ### LLM API Layer
 
@@ -305,7 +362,7 @@ Rewrite or remove message-related methods:
 
 ### Cleanup Managers
 
-- `[mirix/services/user_manager.py](mirix/services/user_manager.py)` -- Replace `agent.message_ids = [agent.message_ids[0]]` with bulk message soft-delete by `user_id`
+- `[mirix/services/user_manager.py](mirix/services/user_manager.py)` -- Replace `agent.message_ids = [agent.message_ids[0]]` with bulk message hard-delete by `user_id` (retention path)
 - `[mirix/services/client_manager.py](mirix/services/client_manager.py)` -- Same, by `client_id`
 
 ### API / Client / SDK
@@ -314,23 +371,60 @@ Rewrite or remove message-related methods:
 - `[mirix/server/server.py](mirix/server/server.py)` -- Update if `save_agent` changes
 - `[mirix/client/client.py](mirix/client/client.py)`, `[mirix/client/remote_client.py](mirix/client/remote_client.py)`, `[mirix/local_client/local_client.py](mirix/local_client/local_client.py)`, `[mirix/sdk.py](mirix/sdk.py)` -- Remove `message_ids` references
 
-### ECMS
+### Tests
 
-- `[context-and-memory-service/common/ipsr/entities/agents.py](context-and-memory-service/common/ipsr/entities/agents.py)` -- Remove `message_ids` from IPSR agents entity
+**Test strategy:** Two layers — unit tests (mocked, no infra) for granular method behavior, and integration tests invoking the REST API to verify end-to-end correctness.
 
-### Database Migration
+**Run tests via:**
 
-Alembic migration:
+```bash
+./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO
+```
 
-1. Add `message_type` (nullable `String`) to `messages`
-2. Add `message_retention_count` (nullable `Integer`, default `0`) to `clients`
-3. Add composite index on `(agent_id, user_id, is_deleted)` to `messages`
-4. Soft-delete existing system messages (`role = 'system'`)
-5. Drop `message_ids` from `agents`
+**Format/lint before committing:**
 
-### Tests
+```bash
+poetry run black . && poetry run isort .
+```
+
+**Files to update (rewrite in-place):**
+
+- `[tests/test_message_handling.py](tests/test_message_handling.py)` — rewrite entirely: current tests cover `get_messages_by_ids` and `message_ids`-based `get_in_context_messages`, both of which are removed. Replace with unit tests for the new query-based retrieval methods (`get_messages_for_agent_user`, `hard_delete_user_messages`, retention pruning logic).
+- `[tests/test_agent_prompt_update.py](tests/test_agent_prompt_update.py)` — rewrite in-place: remove all `message_ids[0]` assertions (system message is no longer stored as a row). Replace with assertions that `agent_state.system` holds the updated prompt and that no system message row exists in the DB.
+
+**New unit tests to add:**
+
+- In-memory accumulator: messages accumulate across `inner_step()` calls without DB writes
+- Retention count = 0: no message rows written after `step()` completes
+- Retention count = N: exactly N input message-sets retained per `(agent_id, user_id)`; older sets hard-deleted
+- Context overflow: `step()` raises hard error, no summarization retry attempted
+- Ephemeral "last edited memory item": present in LLM prompt when chaining, absent from DB retention rows
+
+**New integration tests to add (REST API level):**
+
+- `PUT /agents/{id}` system prompt update: verify `agent_state.system` updated, no system message row created
+- `POST /messages` (save flow): verify retention=0 client writes no message rows; retention=N client writes and prunes correctly
+- Context overflow via API: verify error response, no partial state left in DB
+
+**Remove:**
+
+- All tests asserting summarizer retry/compression behavior in the memory extraction flow
+
+## 8. Chat Agent Deprecation
+
+The `chat_agent` agent type is **deprecated** as of this refactor. It requires retention of full step outputs (assistant responses, tool calls, tool results) for conversational continuity, which is incompatible with the input-message-set-only retention model introduced here.
+
+**Changes required in this refactor:**
+
+- In `mirix/agent/agent.py`: at the top of `step()`, check if `agent_state.agent_type == AgentType.chat_agent` and immediately raise `NotImplementedError` with a clear message pointing to the follow-up ticket.
+- In `mirix/server/server.py`: where `chat_agent` is handled (line 662), add a deprecation warning log before the `NotImplementedError` propagates.
+- In `docs/ARCHITECTURE.md`: add a **Deprecated** section or callout marking `chat_agent` as unsupported pending a follow-up redesign. Explain why (retention model incompatibility) and that it will be addressed in Phase 2.
+- In `mirix/schemas/agent.py`: add a comment on the `chat_agent` enum value marking it as deprecated.
+
+**Do not remove the `chat_agent` enum value** — it is needed for backward-compatible DB reads of existing agent rows.
+
+## 9. Instructions for Developers
 
-- `[tests/test_message_handling.py](tests/test_message_handling.py)` -- Update for query-based retrieval
-- `[tests/test_agent_prompt_update.py](tests/test_agent_prompt_update.py)` -- Update for system message from `agent_state.system`
-- New tests for in-memory accumulation, in-memory summarization, and retention count behavior (0, N)
+After merging this refactor, developers must reset their local databases before running the server or tests. This change removes legacy `agents.message_ids` behavior and introduces new retention semantics, so existing local DB state will be incompatible.
 
+Run `python scripts/reset_database.py` to reset your local database.
diff --git a/CLAUDE.md b/CLAUDE.md
index 5804fbfa0..9f15b14a8 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -40,6 +40,20 @@ python scripts/start_server.py --port 8531
 
 ## Running Tests
 
+The preferred way to run tests is via the dockerized test script, which handles infrastructure automatically:
+
+```bash
+# Full suite with verbose output (preferred)
+./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO
+
+# Pass any pytest args after the flags
+./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO -k test_message_handling
+./scripts/run_tests_with_docker.sh --podman -s -v --log-cli-level=INFO -m "not integration"
+```
+
+**Required env var for tests**: `GEMINI_API_KEY`
+
+### Running without Docker (manual infra)
 ```bash
 # Fast unit tests — no running server needed (~20s)
 pytest tests/test_memory_server.py -v
@@ -50,13 +64,8 @@ pytest -m "not integration" -v
 # Integration tests — requires server on port 8899
 python scripts/start_server.py --port 8899          # Terminal 1
 pytest tests/test_memory_integration.py -v -m integration -s   # Terminal 2
-
-# Full suite
-pytest -v
 ```
 
-**Required env var for tests**: `GEMINI_API_KEY`
-
 ## Common Dev Tasks
 
 ### Add a new API endpoint
@@ -74,6 +83,10 @@ pytest -v
 
 ### Format & lint
 ```bash
+# Preferred (poetry)
+poetry run black . && poetry run isort .
+
+# Alternatively via make
 make format   # ruff import sort + format
 make lint     # ruff check + pyright
 make check    # format + lint + test

From bb5e5a4cf14e5cbd9b9d7601dee2838617700b89 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Fri, 20 Mar 2026 17:52:25 -0700
Subject: [PATCH 04/13] refactor: initial refactor

---
 .cursorrules                                  |   6 +-
 docs/Mirix_async_native_changes.md            |   4 +-
 docs/architecture.html                        |   4 +-
 mirix/agent/__init__.py                       |   3 +-
 mirix/agent/agent.py                          | 557 +++---------------
 mirix/client/client.py                        |   5 -
 mirix/client/remote_client.py                 |  28 +-
 mirix/local_client/local_client.py            |  33 +-
 mirix/orm/agent.py                            |   9 +-
 mirix/orm/client.py                           |   5 +
 mirix/orm/message.py                          |   1 +
 mirix/orm/sqlalchemy_base.py                  |   2 -
 mirix/schemas/agent.py                        |  19 +-
 mirix/schemas/client.py                       |   8 +
 mirix/sdk.py                                  | 113 +---
 mirix/server/rest_api.py                      |   6 +-
 mirix/server/server.py                        |   4 -
 mirix/services/agent_manager.py               | 364 ++----------
 mirix/services/client_manager.py              |  12 +-
 .../services/helpers/agent_manager_helper.py  |  69 +--
 mirix/services/message_manager.py             | 183 +++---
 mirix/services/user_manager.py                |  22 +-
 poetry.lock                                   | 497 ++++++++++------
 pyproject.toml                                |   6 +-
 .../001_add_message_set_retention_count.sql   |  12 +
 .../migrations/002_cleanup_message_ids.sql    |  10 +
 tests/conftest.py                             |  50 +-
 tests/test_agent_prompt_update.py             |  47 +-
 tests/test_block_filter_tag_updates.py        |   8 -
 tests/test_block_filter_tags_update_mode.py   |   8 -
 tests/test_filter_tags_db.py                  |   7 -
 tests/test_memory_integration.py              | 355 +++++++++++
 tests/test_message_handling.py                | 270 ++++-----
 tests/test_queue.py                           |   8 -
 tests/test_raw_memory.py                      |   2 +-
 tests/test_redis_integration.py               |  58 +-
 tests/test_scoped_blocks.py                   |  38 +-
 tests/test_search_all_users.py                |   7 -
 tests/test_user.py                            |   8 -
 tests/test_user_manager.py                    |  42 +-
 40 files changed, 1136 insertions(+), 1754 deletions(-)
 create mode 100644 scripts/migrations/001_add_message_set_retention_count.sql
 create mode 100644 scripts/migrations/002_cleanup_message_ids.sql

diff --git a/.cursorrules b/.cursorrules
index 01f87fc8c..270e434a0 100644
--- a/.cursorrules
+++ b/.cursorrules
@@ -74,7 +74,6 @@ The codebase is fully async-native. Violating these rules will break the server.
 #### 3. Agent Execution Flow
 - `step()` method is the main agent execution loop (like LangChain's AgentExecutor)
 - `inner_step()` handles single LLM interactions with tool calls
-- `save_agent()` persists agent state to database
 - Steps are logged to the `steps` table for audit/analytics (write-only)
 
 #### 4. Message Flow
@@ -117,9 +116,8 @@ Before suggesting changes, verify:
 2. **Do NOT** call `step_manager.get_step()` - steps are write-only audit logs
 3. **Do NOT** bypass `create_or_get_user()` - always ensure users exist first
 4. **Do NOT** create agents without proper `CreateAgent` schema objects
-5. **Do NOT** forget to persist agent state with `save_agent()`
-6. **Do NOT** use `message.step` relationship - it's never loaded in practice
-7. **Do NOT** add duplicate environment variables in settings.py
+5. **Do NOT** use `message.step` relationship - it's never loaded in practice
+6. **Do NOT** add duplicate environment variables in settings.py
 
 ### Testing Guidelines
 - Tests located in `tests/` directory
diff --git a/docs/Mirix_async_native_changes.md b/docs/Mirix_async_native_changes.md
index 8fdb72e67..f6eef21f2 100644
--- a/docs/Mirix_async_native_changes.md
+++ b/docs/Mirix_async_native_changes.md
@@ -99,8 +99,8 @@ Bedrock) use their respective async SDK classes. Streaming responses are
 `asyncio.sleep()`.
 
 **Agent execution** (`mirix/agent/agent.py`)
-`step()`, `inner_step()`, `_get_ai_reply()`, `_handle_ai_response()`,
-`execute_tool_and_persist_state()`, and `save_agent()` are all async.
+`step()`, `inner_step()`, `_get_ai_reply()`, and `_handle_ai_response()`
+are all async.
 Built-in tools (core, memory, extras) are async. User-defined tools
 execute in `ToolExecutionSandbox` via `asyncio.create_subprocess_exec()`
 (no thread pool).
diff --git a/docs/architecture.html b/docs/architecture.html
index 338f20e1e..e411ccbee 100644
--- a/docs/architecture.html
+++ b/docs/architecture.html
@@ -1602,7 +1602,7 @@ <h3>In-Context Messages Per Agent</h3>
                         window sent to
                         the LLM on each call.
                         Messages are stored in the database and loaded via
-                        <code>agent_manager.get_in_context_messages()</code>.
+                        <code>message_manager.get_messages_for_agent_user()</code>.
                     </div>
                     <div class="code-block" style="margin-top: 15px;">
                         <pre><span class="comment"># Message structure for each agent</span>
@@ -1798,7 +1798,7 @@ <h3 style="color: #4ec9b0; margin: 30px 0 20px;">Step Execution Flow</h3>
                             <strong>inner_step()</strong>
                             <span style="color: #858585; margin-left: 10px;">Single async LLM call</span>
                             <div class="code-block" style="margin-top: 10px;">
-                                <pre>in_context_messages = <span class="keyword">await</span> self.agent_manager.<span class="function">get_in_context_messages</span>(...)
+                                <pre>in_context_messages = <span class="keyword">await</span> self.message_manager.<span class="function">get_messages_for_agent_user</span>(...)
 complete_prompt = <span class="keyword">await</span> self.<span class="function">build_system_prompt_with_memories</span>(raw_system)
 response = <span class="keyword">await</span> self.<span class="function">_get_ai_reply</span>(input_message_sequence)
 messages, continue_chaining, failed = <span class="keyword">await</span> self.<span class="function">_handle_ai_response</span>(response)</pre>
diff --git a/mirix/agent/__init__.py b/mirix/agent/__init__.py
index 44ad1a64b..d5d9619a2 100755
--- a/mirix/agent/__init__.py
+++ b/mirix/agent/__init__.py
@@ -23,7 +23,6 @@
     "app_utils",
     "Agent",
     "AgentState",
-    "save_agent",
     "BackgroundAgent",
     "CoreMemoryAgent",
     "EpisodicMemoryAgent",
@@ -35,7 +34,7 @@
     "SemanticMemoryAgent",
 ]
 
-from mirix.agent.agent import Agent, AgentState, save_agent
+from mirix.agent.agent import Agent, AgentState
 from mirix.agent.background_agent import BackgroundAgent
 from mirix.agent.core_memory_agent import CoreMemoryAgent
 from mirix.agent.episodic_memory_agent import EpisodicMemoryAgent
diff --git a/mirix/agent/agent.py b/mirix/agent/agent.py
index 0fbbeb141..6646c6ad4 100644
--- a/mirix/agent/agent.py
+++ b/mirix/agent/agent.py
@@ -14,7 +14,6 @@
 from mirix.agent.tool_validators import validate_tool_args
 from mirix.constants import (
     CHAINING_FOR_MEMORY_UPDATE,
-    CLEAR_HISTORY_AFTER_MEMORY_UPDATE,
     CLI_WARNING_PREFIX,
     ERROR_MESSAGE_PREFIX,
     FIRST_MESSAGE_ATTEMPTS,
@@ -34,14 +33,13 @@
 from mirix.helpers import ToolRulesSolver
 from mirix.helpers.message_helpers import prepare_input_message_create
 from mirix.interface import AgentInterface
-from mirix.llm_api.helpers import calculate_summarizer_cutoff, get_token_counts_for_messages, is_context_overflow_error
+from mirix.llm_api.helpers import get_token_counts_for_messages, is_context_overflow_error
 from mirix.llm_api.llm_api_tools import create
 from mirix.llm_api.llm_client import LLMClient
 from mirix.log import get_logger
-from mirix.memory import summarize_messages
 from mirix.observability.context import get_trace_context, mark_observation_as_child
 from mirix.observability.langfuse_client import get_langfuse_client
-from mirix.schemas.agent import AgentState, AgentStepResponse, UpdateAgent
+from mirix.schemas.agent import AgentState, AgentStepResponse
 from mirix.schemas.block import BlockUpdate
 from mirix.schemas.client import Client
 from mirix.schemas.embedding_config import EmbeddingConfig
@@ -73,7 +71,6 @@
     get_contine_chaining,
     get_token_limit_warning,
     package_function_response,
-    package_summarize_message,
     package_user_message,
 )
 from mirix.tracing import trace_method
@@ -279,23 +276,8 @@ def __init__(
 
     async def load_last_function_response(self):
         """Load the last function response from message history."""
-        # Skip if actor not set yet (during __init__)
-        if self.actor is None:
-            return None
-
-        in_context_messages = await self.agent_manager.get_in_context_messages(
-            agent_state=self.agent_state, actor=self.actor, user=self.user
-        )
-        for i in range(len(in_context_messages) - 1, -1, -1):
-            msg = in_context_messages[i]
-            if msg.role == MessageRole.tool and msg.content[0].text:
-                try:
-                    response_json = json.loads(msg.content[0].text)
-                    if response_json.get("message"):
-                        return response_json["message"]
-                except (json.JSONDecodeError, KeyError):
-                    raise ValueError(f"Invalid JSON format in message: {msg.content[0].text}")
-        return None
+        # message_ids no longer persisted — last_function_response is tracked in-memory only
+        return self.last_function_response
 
     async def update_memory_if_changed(self, new_memory: Memory) -> bool:
         """
@@ -342,7 +324,6 @@ async def update_memory_if_changed(self, new_memory: Memory) -> bool:
             # NOTE: don't do this since re-buildin the memory is handled at the start of the step
             # rebuild memory - this records the last edited timestamp of the memory
             # TODO: pass in update timestamp from block edit time
-            # self.agent_state = self.agent_manager.rebuild_system_prompt(agent_id=self.agent_state.id, actor=self.user)
             return True
 
         return False
@@ -868,12 +849,6 @@ async def _get_ai_reply(
                 log_telemetry(self.logger, "_handle_ai_response finish generic Exception")
                 raise e
 
-            # check if we are going over the context window: this allows for articifial constraints
-            if response.usage.total_tokens > self.agent_state.llm_config.context_window:
-                # trigger summarization
-                log_telemetry(self.logger, "_get_ai_reply summarize_messages_inplace")
-                await self.summarize_messages_inplace(existing_file_uris=existing_file_uris)
-
             # return the response
             return response
 
@@ -1229,210 +1204,6 @@ async def _handle_ai_response(
 
             function_failed = overall_function_failed
 
-            # Handle context message clearing only if ALL functions succeeded
-            if not overall_function_failed:
-                should_clear_history = False
-
-                # Clear history for all non-chat agents when:
-                # 1. chaining=False (clear regardless of function calls), OR
-                # 2. finish_memory_update was called (clear when chaining completes)
-                if CLEAR_HISTORY_AFTER_MEMORY_UPDATE and not self.agent_state.is_type(AgentType.chat_agent):
-                    if not chaining:
-                        should_clear_history = True
-                        self.logger.info(f"should_clear_history=True (chaining=False)")
-                    else:
-                        for func_name in executed_function_names:
-                            if func_name == "finish_memory_update":
-                                should_clear_history = True
-                                self.logger.info(f"should_clear_history=True (finish_memory_update called)")
-                                break
-                else:
-                    self.logger.debug(
-                        f"Clearing skipped - CLEAR_HISTORY_AFTER_MEMORY_UPDATE={CLEAR_HISTORY_AFTER_MEMORY_UPDATE}, is_chat_agent={self.agent_state.is_type(AgentType.chat_agent)}"
-                    )
-
-                if should_clear_history:
-                    continue_chaining = False
-
-                    in_context_messages = await self.agent_manager.get_in_context_messages(
-                        agent_state=self.agent_state, actor=self.actor, user=self.user
-                    )
-                    self.logger.info(
-                        f"Clearing history - {len(in_context_messages)} messages -> keeping only system message"
-                    )
-                    message_ids = [message.id for message in in_context_messages]
-                    message_ids = [message_ids[0]]
-
-                    # show the last edited memory item
-                    memory_item = None
-                    memory_item_str = None
-
-                    if self.user is None:
-                        raise ValueError("User is required to clear history")
-
-                    if self.agent_state.name.endswith("episodic_memory_agent"):
-                        memory_item = await self.episodic_memory_manager.get_most_recently_updated_event(
-                            user=self.user,
-                            timezone_str=self.user.timezone,
-                        )
-                        if memory_item:
-                            memory_item = memory_item[0]
-                            memory_item_str = ""
-                            memory_item_str += "[Episodic Event ID]: " + memory_item.id + "\n"
-                            memory_item_str += (
-                                "[Event Occurred At]: " + memory_item.occurred_at.strftime("%Y-%m-%d %H:%M:%S") + "\n"
-                            )
-                            memory_item_str += "[Summary]: " + memory_item.summary + "\n"
-                            memory_item_str += "[Details]: " + memory_item.details + "\n"
-                            memory_item_str += (
-                                "[Last Modified]: "
-                                + memory_item.last_modify["operation"]
-                                + " at "
-                                + memory_item.last_modify["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
-                                + "\n"
-                            )
-                            memory_item_str = memory_item_str.strip()
-
-                    elif self.agent_state.name.endswith("procedural_memory_agent"):
-                        memory_item = await self.procedural_memory_manager.get_most_recently_updated_item(
-                            user=self.user,
-                            timezone_str=self.user.timezone,
-                        )
-                        if memory_item:
-                            memory_item = memory_item[0]
-                            memory_item_str = ""
-                            memory_item_str += "[Procedural Memory ID]: " + memory_item.id + "\n"
-                            memory_item_str += "[Entry Type]: " + memory_item.entry_type + "\n"
-                            memory_item_str += "[Summary]: " + (memory_item.summary or "N/A") + "\n"
-                            memory_item_str += "[Steps]: " + "; ".join(memory_item.steps) + "\n"
-                            memory_item_str += (
-                                "[Last Modified]: "
-                                + memory_item.last_modify["operation"]
-                                + " at "
-                                + memory_item.last_modify["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
-                                + "\n"
-                            )
-                            memory_item_str = memory_item_str.strip()
-
-                    elif self.agent_state.name.endswith("resource_memory_agent"):
-                        memory_item = await self.resource_memory_manager.get_most_recently_updated_item(
-                            user=self.user,
-                            timezone_str=self.user.timezone,
-                        )
-                        if memory_item:
-                            memory_item = memory_item[0]
-                            memory_item_str = ""
-                            memory_item_str += "[Resource Memory ID]: " + memory_item.id + "\n"
-                            memory_item_str += "[Title]: " + memory_item.title + "\n"
-                            memory_item_str += "[Summary]: " + (memory_item.summary or "N/A") + "\n"
-                            memory_item_str += "[Resource Type]: " + memory_item.resource_type + "\n"
-                            memory_item_str += "[Content]: " + memory_item.content + "\n"
-                            memory_item_str += (
-                                "[Last Modified]: "
-                                + memory_item.last_modify["operation"]
-                                + " at "
-                                + memory_item.last_modify["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
-                                + "\n"
-                            )
-                            memory_item_str = memory_item_str.strip()
-
-                    elif self.agent_state.name.endswith("knowledge_vault_memory_agent"):
-                        memory_item = await self.knowledge_vault_manager.get_most_recently_updated_item(
-                            user=self.user,
-                            timezone_str=self.user.timezone,
-                        )
-
-                        # Check if finish_memory_update was one of the executed functions
-                        if "finish_memory_update" in executed_function_names and memory_item is None:
-                            memory_item_str = "No new knowledge vault items were added."
-
-                        if memory_item:
-                            memory_item = memory_item[0]
-                            memory_item_str = ""
-                            memory_item_str += "[Knowledge Vault ID]: " + memory_item.id + "\n"
-                            memory_item_str += "[Entry Type]: " + memory_item.entry_type + "\n"
-                            memory_item_str += "[Caption]: " + memory_item.caption + "\n"
-                            memory_item_str += "[Source]: " + memory_item.source + "\n"
-                            memory_item_str += "[Sensitivity]: " + memory_item.sensitivity + "\n"
-                            memory_item_str += "[Secret Value]: " + memory_item.secret_value + "\n"
-                            memory_item_str += (
-                                "[Last Modified]: "
-                                + memory_item.last_modify["operation"]
-                                + " at "
-                                + memory_item.last_modify["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
-                                + "\n"
-                            )
-                            memory_item_str = memory_item_str.strip()
-
-                    elif self.agent_state.name.endswith("semantic_memory_agent"):
-                        memory_item = await self.semantic_memory_manager.get_most_recently_updated_item(
-                            user=self.user,
-                            timezone_str=self.user.timezone,
-                        )
-                        if memory_item:
-                            memory_item = memory_item[0]
-                            memory_item_str = ""
-                            memory_item_str += "[Semantic Memory ID]: " + memory_item.id + "\n"
-                            memory_item_str += "[Name]: " + memory_item.name + "\n"
-                            memory_item_str += "[Summary]: " + memory_item.summary + "\n"
-                            memory_item_str += "[Details]: " + (memory_item.details or "N/A") + "\n"
-                            memory_item_str += "[Source]: " + (memory_item.source or "N/A") + "\n"
-                            memory_item_str += (
-                                "[Last Modified]: "
-                                + memory_item.last_modify["operation"]
-                                + " at "
-                                + memory_item.last_modify["timestamp"].strftime("%Y-%m-%d %H:%M:%S")
-                                + "\n"
-                            )
-                            memory_item_str = memory_item_str.strip()
-
-                    elif self.agent_state.name.endswith("core_memory_agent"):
-                        memory_item_str = self.blocks_in_memory.compile() if self.blocks_in_memory else ""
-
-                    # Optionally create a summary message showing last edited memory item
-                    if memory_item_str:
-                        if self.agent_state.name.endswith("core_memory_agent"):
-                            message_content = "Current Full Core Memory:\n\n" + memory_item_str
-                        else:
-                            message_content = "Last edited memory item:\n\n" + memory_item_str
-
-                        # create a new message
-                        new_message = Message.dict_to_message(
-                            agent_id=self.agent_state.id,
-                            model=self.model,
-                            openai_message_dict={
-                                "role": "user",
-                                "content": message_content,
-                            },
-                        )
-
-                        # persist the message to the database
-                        persisted_message = await self.message_manager.create_message(
-                            new_message,
-                            actor=self.actor,  # Client for write operations (audit trail)
-                            client_id=self.client_id,  # From actor (Client)
-                            user_id=(
-                                self.user_id if self.user_id else UserManager.ADMIN_USER_ID
-                            ),  # Fallback to default user
-                        )
-
-                        # append the persisted message ID to the message list
-                        message_ids.append(persisted_message.id)
-
-                    # Clear history for all non-chat agents when should_clear_history is True
-                    # This applies to meta_memory_agent and all memory sub-agents
-                    await self.agent_manager.set_in_context_messages(
-                        agent_id=self.agent_state.id,
-                        message_ids=message_ids,
-                        actor=self.actor,
-                    )
-                    await self.message_manager.delete_detached_messages_for_agent(
-                        agent_id=self.agent_state.id, actor=self.actor
-                    )
-
-                    # Clear all messages since they were manually added to the conversation history
-                    messages = []
-
         else:
             # Standard non-function reply
             # Validate that we have content - LLM returned neither tool_calls nor content
@@ -1486,6 +1257,15 @@ async def step(
             user: User object for read operations (loading blocks, memory filtering) - data scope
         """
 
+        from mirix.schemas.agent import AgentType
+
+        # chat_agent is deprecated - raise immediately
+        if self.agent_state.is_type(AgentType.chat_agent):
+            raise NotImplementedError(
+                "AgentType.chat_agent is deprecated and no longer supported. "
+                "Use a memory agent type instead."
+            )
+
         # Store actor for write operations
         if actor:
             self.actor = actor
@@ -1495,8 +1275,6 @@ async def step(
             self.user = user
 
             # Only load blocks for core_memory_agent (other agent types don't use blocks)
-            from mirix.schemas.agent import AgentType
-
             if self.agent_state.is_type(AgentType.core_memory_agent):
                 # Load existing blocks for this user, scoped by the client's write_scope.
                 # auto_create_from_default=True will create blocks from template if they don't exist for this scope.
@@ -1516,9 +1294,8 @@ async def step(
                 # Load blocks into memory for core_memory_agent
                 self.blocks_in_memory = Memory(blocks=existing_blocks)
 
-        # Load last function response from message history (deferred from __init__)
-        if self.actor is not None and self.last_function_response is None:
-            self.last_function_response = await self.load_last_function_response()
+        # Reset last function response for this step (no longer loaded from DB history)
+        self.last_function_response = None
 
         max_chaining_steps = max_chaining_steps or MAX_CHAINING_STEPS
 
@@ -1559,24 +1336,25 @@ async def step(
         total_usage = UsageStatistics()
         step_count = 0
 
-        initial_message_count = len(
-            await self.agent_manager.get_in_context_messages(
-                agent_state=self.agent_state, actor=self.actor, user=self.user
-            )
-        )
-
-        if self.agent_state.is_type(AgentType.reflexion_agent):
-            # clear previous messages
-            in_context_messages = await self.agent_manager.get_in_context_messages(
-                agent_state=self.agent_state, actor=self.actor, user=self.user
-            )
-            in_context_messages = in_context_messages[:1]
-            await self.agent_manager.set_in_context_messages(
+        # Load retained messages from DB if client has retention configured
+        retention = (self.actor.message_set_retention_count or 0) if self.actor else 0
+        retained_messages: List[Message] = []
+        if retention > 0 and self.actor and self.user_id:
+            retained_messages = await self.message_manager.get_messages_for_agent_user(
                 agent_id=self.agent_state.id,
-                message_ids=[message.id for message in in_context_messages],
+                user_id=self.user_id,
                 actor=self.actor,
+                limit=retention,
             )
 
+        # In-memory accumulator: starts with retained messages, grows with each chaining step
+        accumulated: List[Message] = list(retained_messages)
+        # Track the initial input messages for retention write-back
+        input_messages_for_persistence = message_objects
+
+        # initial_message_count = system_msg (1) + retained messages
+        initial_message_count = 1 + len(retained_messages)
+
         # Initialize the LLM client once per step to reuse across retries.
         llm_client = LLMClient.create(
             llm_config=self.agent_state.llm_config,
@@ -1618,6 +1396,7 @@ async def step(
             step_response = await self.inner_step(
                 first_input_messge=first_input_message,
                 messages=next_input_message,
+                accumulated=accumulated,
                 extra_messages=extra_message_objects,
                 initial_message_count=initial_message_count,
                 chaining=chaining,
@@ -1630,15 +1409,14 @@ async def step(
             token_warning = step_response.in_context_memory_warning
             usage = step_response.usage
 
+            # Accumulate step messages for next chaining iteration
+            accumulated = accumulated + step_response.messages
+
             step_count += 1
             total_usage += usage
             counter += 1
             self.interface.step_complete()
 
-            # logger.debug("Saving agent state")
-            # save updated state
-            await save_agent(self)
-
             # Chain stops
             if not chaining and (not function_failed):
                 printv(f"[Mirix.Agent.{self.agent_state.name}] INFO: No chaining, stopping after one step")
@@ -1701,8 +1479,20 @@ async def step(
             else:
                 break
 
-        # Save the message_ids
-        await save_agent(self)
+        # Retention write-back: persist input messages and prune old ones if configured
+        if retention > 0 and self.actor and self.user_id and input_messages_for_persistence:
+            await self.message_manager.create_many_messages(
+                input_messages_for_persistence,
+                actor=self.actor,
+                client_id=self.client_id,
+                user_id=self.user_id,
+            )
+            await self.message_manager.hard_delete_user_messages_for_agent(
+                agent_id=self.agent_state.id,
+                user_id=self.user_id,
+                actor=self.actor,
+                keep_newest_n=retention,
+            )
 
         return MirixUsageStatistics(**total_usage.model_dump(), step_count=step_count)
 
@@ -2210,14 +2000,8 @@ async def construct_system_message(self, message: str) -> str:
         """
         topics = await self._extract_topics_from_message(message)
 
-        in_context_messages = await self.agent_manager.get_in_context_messages(
-            agent_state=self.agent_state, actor=self.actor, user=self.user
-        )
-        raw_system = (
-            in_context_messages[0].content[0].text
-            if in_context_messages and in_context_messages[0].role == MessageRole.system
-            else ""
-        )
+        # Use system prompt directly from agent state (no longer stored as a DB message)
+        raw_system = self.agent_state.system or ""
 
         # Build the complete system prompt with memories
         complete_system_prompt, _ = await self.build_system_prompt_with_memories(raw_system=raw_system, topics=topics)
@@ -2228,13 +2012,13 @@ async def inner_step(
         self,
         first_input_messge: Message,
         messages: Union[Message, List[Message]],
+        accumulated: Optional[List[Message]] = None,
         first_message: bool = False,
         first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
         skip_verify: bool = False,
         stream: bool = False,  # TODO move to config?
         step_count: Optional[int] = None,
         metadata: Optional[dict] = None,
-        summarize_attempt_count: int = 0,
         force_response: bool = False,
         topics: Optional[str] = None,
         retrieved_memories: Optional[dict] = None,
@@ -2251,6 +2035,9 @@ async def inner_step(
     ) -> AgentStepResponse:
         """Runs a single step in the agent loop (generates at most one LLM call)"""
 
+        if accumulated is None:
+            accumulated = []
+
         try:
             # Log the start of each reasoning step
             printv(
@@ -2259,16 +2046,8 @@ async def inner_step(
             if topics:
                 printv(f"[Mirix.Agent.{self.agent_state.name}] INFO: Step topics: {topics}")
 
-            # previous_in_context_messages = self.agent_state.message_ids
-            # new_message_ids = self.agent_manager.get_agent_by_id(agent_id=self.agent_state.id, actor=self.user).message_ids
-
-            # Step 0: get in-context messages and get the raw system prompt
-            in_context_messages = await self.agent_manager.get_in_context_messages(
-                agent_state=self.agent_state, actor=self.actor, user=self.user
-            )
-
-            assert in_context_messages[0].role == MessageRole.system
-            raw_system = in_context_messages[0].content[0].text
+            # Step 0: build the system message on-the-fly from agent_state.system + memories
+            raw_system = self.agent_state.system or ""
 
             # Build the complete system prompt with memories
             complete_system_prompt, retrieved_memories = await self.build_system_prompt_with_memories(
@@ -2277,7 +2056,11 @@ async def inner_step(
                 retrieved_memories=retrieved_memories,
             )
 
-            in_context_messages[0].content[0].text = complete_system_prompt
+            system_msg = Message.dict_to_message(
+                agent_id=self.agent_state.id,
+                model=self.model,
+                openai_message_dict={"role": "system", "content": complete_system_prompt},
+            )
 
             # Step 1: add user message
             if isinstance(messages, Message):
@@ -2286,7 +2069,8 @@ async def inner_step(
             if not all(isinstance(m, Message) for m in messages):
                 raise ValueError(f"messages should be a Message or a list of Message, got {type(messages)}")
 
-            input_message_sequence = in_context_messages + messages
+            # Build sequence: [system] + accumulated (prior chaining steps) + current messages
+            input_message_sequence = [system_msg] + accumulated + messages
 
             if extra_messages is not None:
                 input_message_sequence = (
@@ -2430,7 +2214,7 @@ async def inner_step(
                     else LLM_MAX_TOKENS["DEFAULT"]
                 )
 
-            # Log step - this must happen before messages are persisted
+            # Log step
             step = await self.step_manager.log_step(
                 actor=self.actor,
                 provider_name=self.agent_state.llm_config.model_endpoint_type,
@@ -2441,16 +2225,7 @@ async def inner_step(
             for message in all_new_messages:
                 message.step_id = step.id
 
-            # Persisting into Messages - MUST happen before summarization
-            # so that summarize_messages_inplace can see all messages
-            self.agent_state = await self.agent_manager.append_to_in_context_messages(
-                all_new_messages,
-                agent_id=self.agent_state.id,
-                actor=self.actor,
-                user_id=self.user_id,
-            )
-
-            # Check memory pressure AFTER messages are persisted
+            # Check memory pressure (warning only — no in-loop summarization)
             if current_total_tokens > summarizer_settings.memory_warning_threshold * int(
                 self.agent_state.llm_config.context_window
             ):
@@ -2463,9 +2238,6 @@ async def inner_step(
                     active_memory_warning = True
                     self.agent_alerted_about_memory_pressure = True  # it's up to the outer loop to handle this
 
-                # if it is too long then run summarization here.
-                await self.summarize_messages_inplace(existing_file_uris=existing_file_uris)
-
             else:
                 printv(
                     f"[Mirix.Agent.{self.agent_state.name}] DEBUG: Memory usage acceptable: last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
@@ -2485,64 +2257,18 @@ async def inner_step(
             )
 
         except Exception as e:
-            printv(f"[Mirix.Agent.{self.agent_state.name}] ERROR: step() failed\nmessages = {messages}\nerror = {e}")
-
-            # If we got a context alert, try trimming the messages length, then try again
+            printv(f"[Mirix.Agent.{self.agent_state.name}] ERROR: inner_step() failed\nmessages = {messages}\nerror = {e}")
             if is_context_overflow_error(e):
-                in_context_messages = await self.agent_manager.get_in_context_messages(
-                    agent_state=self.agent_state, actor=self.actor, user=self.user
+                num_accumulated = len(accumulated) + len(messages)
+                err_msg = f"Context window exceeded for agent id={self.agent_state.id} with {num_accumulated} in-context messages."
+                printv(f"[Mirix.Agent.{self.agent_state.name}] ERROR: {err_msg}")
+                raise ContextWindowExceededError(
+                    err_msg,
+                    details={"num_in_context_messages": num_accumulated},
                 )
-
-                if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
-                    printv(
-                        f"[Mirix.Agent.{self.agent_state.name}] WARNING: context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"
-                    )
-                    # A separate API call to run a summarizer
-                    await self.summarize_messages_inplace(existing_file_uris=existing_file_uris)
-
-                    # Try step again
-                    return await self.inner_step(
-                        messages=messages,
-                        first_message=first_message,
-                        first_input_messge=first_input_messge,
-                        first_message_retry_limit=first_message_retry_limit,
-                        skip_verify=skip_verify,
-                        stream=stream,
-                        metadata=metadata,
-                        summarize_attempt_count=summarize_attempt_count + 1,
-                        force_response=force_response,
-                        extra_messages=extra_messages,
-                        topics=topics,
-                        retrieved_memories=retrieved_memories,
-                        chaining=chaining,
-                        message_queue=message_queue,
-                        initial_message_count=initial_message_count,
-                        return_memory_types_without_update=return_memory_types_without_update,
-                        display_intermediate_message=display_intermediate_message,
-                        request_user_confirmation=request_user_confirmation,
-                        existing_file_uris=existing_file_uris,
-                        llm_client=llm_client,
-                    )
-                else:
-                    err_msg = f"Ran summarizer {summarize_attempt_count - 1} times for agent id={self.agent_state.id}, but messages are still overflowing the context window."
-                    token_counts = (get_token_counts_for_messages(in_context_messages),)
-                    printv(f"[Mirix.Agent.{self.agent_state.name}] ERROR: {err_msg}")
-                    printv(
-                        f"[Mirix.Agent.{self.agent_state.name}] ERROR: num_in_context_messages: {len(self.agent_state.message_ids)}"
-                    )
-                    printv(f"[Mirix.Agent.{self.agent_state.name}] ERROR: token_counts: {token_counts}")
-                    raise ContextWindowExceededError(
-                        err_msg,
-                        details={
-                            "num_in_context_messages": len(self.agent_state.message_ids),
-                            "in_context_messages_text": [m.text for m in in_context_messages],
-                            "token_counts": token_counts,
-                        },
-                    )
-
             else:
                 printv(
-                    f"[Mirix.Agent.{self.agent_state.name}] ERROR: step() failed with an unrecognized exception: '{str(e)}'"
+                    f"[Mirix.Agent.{self.agent_state.name}] ERROR: inner_step() failed with an unrecognized exception: '{str(e)}'"
                 )
                 raise e
 
@@ -2583,107 +2309,6 @@ async def step_user_message(self, user_message_str: str, **kwargs) -> AgentStepR
 
         return await self.inner_step(messages=[user_message], **kwargs)
 
-    async def summarize_messages_inplace(self, existing_file_uris: Optional[List[str]] = None):
-        in_context_messages = await self.agent_manager.get_in_context_messages(
-            agent_state=self.agent_state, actor=self.actor, user=self.user
-        )
-        in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
-        in_context_messages_openai_no_system = in_context_messages_openai[1:]
-        token_counts = get_token_counts_for_messages(in_context_messages)
-        logger.info("System message token count=%s", token_counts[0])
-        logger.info("token_counts_no_system=%s", token_counts[1:])
-
-        if in_context_messages_openai[0]["role"] != "system":
-            raise RuntimeError(
-                f"in_context_messages_openai[0] should be system (instead got {in_context_messages_openai[0]})"
-            )
-
-        # If at this point there's nothing to summarize, throw an error
-        if len(in_context_messages_openai_no_system) == 0:
-            raise ContextWindowExceededError(
-                "Not enough messages to compress for summarization",
-                details={
-                    "num_candidate_messages": len(in_context_messages_openai_no_system),
-                    "num_total_messages": len(in_context_messages_openai),
-                },
-            )
-
-        cutoff = calculate_summarizer_cutoff(
-            in_context_messages=in_context_messages,
-            token_counts=token_counts,
-            logger=self.logger,
-        )
-
-        message_sequence_to_summarize = in_context_messages[1:cutoff]  # do NOT get rid of the system message
-        self.logger.info(
-            f"Attempting to summarize {len(message_sequence_to_summarize)} messages of {len(in_context_messages)}"
-        )
-
-        # We can't do summarize logic properly if context_window is undefined
-        if self.agent_state.llm_config.context_window is None:
-            # Fallback if for some reason context_window is missing, just set to the default
-            self.logger.warning(
-                f"{CLI_WARNING_PREFIX}could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}"
-            )
-            self.agent_state.llm_config.context_window = (
-                LLM_MAX_TOKENS[self.model]
-                if (self.model is not None and self.model in LLM_MAX_TOKENS)
-                else LLM_MAX_TOKENS["DEFAULT"]
-            )
-
-        summary = await summarize_messages(
-            agent_state=self.agent_state,
-            message_sequence_to_summarize=message_sequence_to_summarize,
-            existing_file_uris=existing_file_uris,
-        )
-        logger.info("Got summary: %s", summary)
-
-        # Metadata that's useful for the agent to see
-        all_time_message_count = await self.message_manager.size(
-            agent_id=self.agent_state.id, actor=self.actor, user_id=self.user_id
-        )
-        remaining_message_count = 1 + len(in_context_messages) - cutoff  # System + remaining
-        hidden_message_count = all_time_message_count - remaining_message_count
-        summary_message_count = len(message_sequence_to_summarize)
-        summary_message = package_summarize_message(
-            summary, summary_message_count, hidden_message_count, all_time_message_count
-        )
-        logger.info("Packaged into message: %s", summary_message)
-
-        prior_len = len(in_context_messages_openai)
-        self.agent_state = await self.agent_manager.trim_older_in_context_messages(
-            num=cutoff,
-            agent_id=self.agent_state.id,
-            actor=self.actor,
-            user_id=self.user_id,
-        )
-        packed_summary_message = {"role": "user", "content": summary_message}
-
-        # Prepend the summary
-        self.agent_state = await self.agent_manager.prepend_to_in_context_messages(
-            messages=[
-                Message.dict_to_message(
-                    agent_id=self.agent_state.id,
-                    model=self.model,
-                    openai_message_dict=packed_summary_message,
-                )
-            ],
-            agent_id=self.agent_state.id,
-            actor=self.actor,
-            user_id=self.user_id,
-        )
-
-        # reset alert
-        self.agent_alerted_about_memory_pressure = False
-        curr_in_context_messages = await self.agent_manager.get_in_context_messages(
-            agent_state=self.agent_state, actor=self.actor, user=self.user
-        )
-
-        self.logger.info(f"Ran summarizer, messages length {prior_len} -> {len(curr_in_context_messages)}")
-        self.logger.info(
-            f"Summarizer brought down total token count from {sum(token_counts)} -> {sum(get_token_counts_for_messages(curr_in_context_messages))}"
-        )
-
     def add_function(self, function_name: str) -> str:
         # TODO: refactor
         raise NotImplementedError
@@ -2707,11 +2332,23 @@ async def get_context_window(self) -> ContextWindowOverview:
         core_memory = self.blocks_in_memory.compile() if self.blocks_in_memory else ""
         num_tokens_core_memory = count_tokens(core_memory)
 
-        # Grab the in-context messages
-        # conversion of messages to OpenAI dict format, which is passed to the token counter
-        in_context_messages = await self.agent_manager.get_in_context_messages(
-            agent_state=self.agent_state, actor=self.actor, user=self.user
+        # Grab recent messages from DB for token counting purposes
+        retention = (self.actor.message_set_retention_count or 0) if self.actor else 0
+        db_messages: List[Message] = []
+        if retention > 0 and self.actor and self.user_id:
+            db_messages = await self.message_manager.get_messages_for_agent_user(
+                agent_id=self.agent_state.id,
+                user_id=self.user_id,
+                actor=self.actor,
+                limit=retention,
+            )
+        # Prepend synthetic system message for context window calculation
+        system_msg_for_count = Message.dict_to_message(
+            agent_id=self.agent_state.id,
+            model=self.model,
+            openai_message_dict={"role": "system", "content": system_prompt},
         )
+        in_context_messages = [system_msg_for_count] + db_messages
         in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
 
         # Check if there's a summary message in the message queue
@@ -2806,22 +2443,6 @@ async def count_tokens(self) -> int:
         return context_window_breakdown.context_window_size_current
 
 
-async def save_agent(agent: Agent):
-    """Save agent to metadata store"""
-    agent_state = agent.agent_state
-
-    # TODO: move this to agent manager
-    # TODO: Completely strip out metadata
-    # convert to persisted model
-    agent_manager = AgentManager()
-    update_agent = UpdateAgent(
-        message_ids=agent_state.message_ids,
-        # TODO: Add this back in later
-        # tool_exec_environment_variables=agent_state.get_agent_env_vars_as_dict(),
-    )
-    await agent_manager.update_agent(agent_id=agent_state.id, agent_update=update_agent, actor=agent.actor)
-
-
 def strip_name_field_from_user_message(
     user_message_text: str,
 ) -> Tuple[str, Optional[str]]:
diff --git a/mirix/client/client.py b/mirix/client/client.py
index 82660183f..39abff944 100644
--- a/mirix/client/client.py
+++ b/mirix/client/client.py
@@ -59,7 +59,6 @@ async def create_agent(
         include_meta_memory_tools: Optional[bool] = False,
         metadata: Optional[Dict] = None,
         description: Optional[str] = None,
-        initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
     ) -> AgentState:
         raise NotImplementedError
@@ -74,7 +73,6 @@ async def update_agent(
         metadata: Optional[Dict] = None,
         llm_config: Optional[LLMConfig] = None,
         embedding_config: Optional[EmbeddingConfig] = None,
-        message_ids: Optional[List[str]] = None,
         memory: Optional[Memory] = None,
         tags: Optional[List[str]] = None,
     ):
@@ -110,9 +108,6 @@ async def get_archival_memory_summary(self, agent_id: str) -> ArchivalMemorySumm
     async def get_recall_memory_summary(self, agent_id: str) -> RecallMemorySummary:
         raise NotImplementedError
 
-    async def get_in_context_messages(self, agent_id: str) -> List[Message]:
-        raise NotImplementedError
-
     async def send_message(
         self,
         message: str,
diff --git a/mirix/client/remote_client.py b/mirix/client/remote_client.py
index 200bbd78d..18c72a043 100644
--- a/mirix/client/remote_client.py
+++ b/mirix/client/remote_client.py
@@ -15,28 +15,23 @@
 from mirix.client.client import AbstractClient
 from mirix.constants import FUNCTION_RETURN_CHAR_LIMIT
 from mirix.log import get_logger
-from mirix.schemas.agent import AgentState, AgentType, CreateAgent, CreateMetaAgent
-from mirix.schemas.block import Block, BlockUpdate, CreateBlock, Human, Persona
+from mirix.schemas.agent import AgentState, AgentType
+from mirix.schemas.block import Block, Human, Persona
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.environment_variables import (
     SandboxEnvironmentVariable,
-    SandboxEnvironmentVariableCreate,
-    SandboxEnvironmentVariableUpdate,
 )
-from mirix.schemas.file import FileMetadata
 from mirix.schemas.llm_config import LLMConfig
 from mirix.schemas.memory import ArchivalMemorySummary, Memory, RecallMemorySummary
-from mirix.schemas.message import Message, MessageCreate
+from mirix.schemas.message import Message
 from mirix.schemas.mirix_response import MirixResponse
 from mirix.schemas.organization import Organization
 from mirix.schemas.sandbox_config import (
     E2BSandboxConfig,
     LocalSandboxConfig,
     SandboxConfig,
-    SandboxConfigCreate,
-    SandboxConfigUpdate,
 )
-from mirix.schemas.tool import Tool, ToolCreate, ToolUpdate
+from mirix.schemas.tool import Tool
 from mirix.schemas.tool_rule import BaseToolRule
 
 logger = get_logger(__name__)
@@ -485,7 +480,6 @@ async def create_agent(
         include_meta_memory_tools: Optional[bool] = False,
         metadata: Optional[Dict] = None,
         description: Optional[str] = None,
-        initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
         headers: Optional[Dict[str, str]] = None,
     ) -> AgentState:
@@ -504,9 +498,6 @@ async def create_agent(
             "include_meta_memory_tools": include_meta_memory_tools,
             "metadata": metadata,
             "description": description,
-            "initial_message_sequence": [
-                msg.model_dump() if hasattr(msg, "model_dump") else msg for msg in (initial_message_sequence or [])
-            ],
             "tags": tags,
         }
 
@@ -523,7 +514,6 @@ async def update_agent(
         metadata: Optional[Dict] = None,
         llm_config: Optional[LLMConfig] = None,
         embedding_config: Optional[EmbeddingConfig] = None,
-        message_ids: Optional[List[str]] = None,
         memory: Optional[Memory] = None,
         tags: Optional[List[str]] = None,
         headers: Optional[Dict[str, str]] = None,
@@ -537,7 +527,6 @@ async def update_agent(
             "metadata": metadata,
             "llm_config": llm_config.model_dump() if llm_config else None,
             "embedding_config": (embedding_config.model_dump() if embedding_config else None),
-            "message_ids": message_ids,
             "memory": memory.model_dump() if memory else None,
             "tags": tags,
         }
@@ -554,8 +543,7 @@ async def update_system_prompt(
         """
         Update an agent's system prompt by agent name.
 
-        This method updates the agent's system prompt and triggers a rebuild
-        of the system message in the agent's message history.
+        This method updates the agent's system prompt in persisted agent state.
 
         The method accepts short agent names like "episodic", "semantic", "core",
         or full names like "meta_memory_agent_episodic_memory_agent".
@@ -564,8 +552,6 @@ async def update_system_prompt(
         1. Resolves the agent name to agent_id for the authenticated client
         2. Updates the agent.system field in PostgreSQL
         3. Updates the agent.system field in Redis cache
-        4. Creates a new system message
-        5. Updates message_ids[0] to reference the new system message
 
         Args:
             agent_name: Name of the agent to update. Can be:
@@ -676,10 +662,6 @@ async def get_recall_memory_summary(self, agent_id: str, headers: Optional[Dict[
         data = await self._request("GET", f"/agents/{agent_id}/memory/recall", headers=headers)
         return RecallMemorySummary(**data)
 
-    async def get_in_context_messages(self, agent_id: str) -> List[Message]:
-        """Get in-context messages."""
-        raise NotImplementedError("get_in_context_messages not yet implemented in REST API")
-
     # ========================================================================
     # Message Methods
     # ========================================================================
diff --git a/mirix/local_client/local_client.py b/mirix/local_client/local_client.py
index 995ab43ed..6466ef277 100644
--- a/mirix/local_client/local_client.py
+++ b/mirix/local_client/local_client.py
@@ -14,7 +14,7 @@
 import os
 import shutil
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
 from urllib.parse import urlparse
 
 import httpx
@@ -530,7 +530,6 @@ async def create_agent(
         # metadata
         metadata: Optional[Dict] = None,
         description: Optional[str] = None,
-        initial_message_sequence: Optional[List[Message]] = None,
         tags: Optional[List[str]] = None,
     ) -> AgentState:
         """Create an agent.
@@ -596,7 +595,6 @@ async def _resolve_tool_ids():
             "agent_type": agent_type,
             "llm_config": llm_config if llm_config else self._default_llm_config,
             "embedding_config": (embedding_config if embedding_config else self._default_embedding_config),
-            "initial_message_sequence": initial_message_sequence,
             "tags": tags,
         }
         if name is not None:
@@ -698,7 +696,6 @@ async def update_agent(
         metadata: Optional[Dict] = None,
         llm_config: Optional[LLMConfig] = None,
         embedding_config: Optional[EmbeddingConfig] = None,
-        message_ids: Optional[List[str]] = None,
     ):
         """
         Update an agent's configuration.
@@ -712,7 +709,6 @@ async def update_agent(
             metadata (Dict): New metadata
             llm_config (LLMConfig): New LLM configuration
             embedding_config (EmbeddingConfig): New embedding configuration
-            message_ids (List[str]): New list of message IDs
 
         Returns:
             AgentState: Updated agent state
@@ -735,8 +731,6 @@ async def update_agent(
             update_data["llm_config"] = llm_config
         if embedding_config is not None:
             update_data["embedding_config"] = embedding_config
-        if message_ids is not None:
-            update_data["message_ids"] = message_ids
 
         agent_update = UpdateAgent(**update_data)
         await self._ensure_client()
@@ -794,31 +788,6 @@ async def get_recall_memory_summary(self, agent_id: str) -> RecallMemorySummary:
             actor=self.client,
         )
 
-    async def get_in_context_messages(self, agent_id: str, user_id: Optional[str] = None) -> List[Message]:
-        """
-        Get in-context messages of an agent
-
-        Args:
-            agent_id (str): ID of the agent
-            user_id (str): Optional user ID to filter messages for. If None, returns all messages.
-
-        Returns:
-            messages (List[Message]): List of in-context messages
-        """
-        await self._ensure_client()
-        agent_state = await self.server.agent_manager.get_agent_by_id(
-            agent_id=agent_id,
-            actor=self.client,
-        )
-        user = None
-        if user_id:
-            user = await self.server.user_manager.get_user_by_id(user_id)
-        return await self.server.agent_manager.get_in_context_messages(
-            agent_state=agent_state,
-            actor=self.client,
-            user=user,
-        )
-
     # agent interactions
 
     async def construct_system_message(self, agent_id: str, message: str, user_id: str) -> str:
diff --git a/mirix/orm/agent.py b/mirix/orm/agent.py
index 485cbecad..cea73b001 100755
--- a/mirix/orm/agent.py
+++ b/mirix/orm/agent.py
@@ -1,7 +1,7 @@
 import uuid
 from typing import TYPE_CHECKING, List, Optional
 
-from sqlalchemy import JSON, String
+from sqlalchemy import JSON, String  # JSON retained for mcp_tools column
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 
 from mirix.orm.custom_columns import (
@@ -50,10 +50,6 @@ class Agent(SqlalchemyBase, OrganizationMixin):
     # System prompt
     system: Mapped[Optional[str]] = mapped_column(String, nullable=True, doc="The system prompt used by the agent.")
 
-    message_ids: Mapped[Optional[List[str]]] = mapped_column(
-        JSON, nullable=True, doc="List of message IDs in in-context memory."
-    )
-
     # Metadata and configs
     llm_config: Mapped[Optional[LLMConfig]] = mapped_column(
         LLMConfigColumn,
@@ -80,7 +76,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
     messages: Mapped[List["Message"]] = relationship(
         "Message",
         back_populates="agent",
-        lazy="selectin",
+        lazy="noload",
         cascade="all, delete-orphan",  # Ensure messages are deleted when the agent is deleted
         passive_deletes=True,
     )
@@ -94,7 +90,6 @@ def to_pydantic(self) -> PydanticAgentState:
             "description": self.description,
             "parent_id": self.parent_id,
             "children": None,  # Children are populated separately when needed
-            "message_ids": self.message_ids,
             "tools": self.tools,
             "tool_rules": self.tool_rules,
             "system": self.system,
diff --git a/mirix/orm/client.py b/mirix/orm/client.py
index f306d1950..dec496d2e 100644
--- a/mirix/orm/client.py
+++ b/mirix/orm/client.py
@@ -29,6 +29,11 @@ class Client(SqlalchemyBase, OrganizationMixin):
         JSON, nullable=False, default=list, doc="Scopes for reading memories."
     )
 
+    # Message retention
+    message_set_retention_count: Mapped[Optional[int]] = mapped_column(
+        nullable=True, default=0, doc="Number of input message-sets to retain per (agent, user). 0 = no retention."
+    )
+
     # Dashboard authentication fields
     email: Mapped[Optional[str]] = mapped_column(
         nullable=True, unique=True, index=True, doc="Email address for dashboard login."
diff --git a/mirix/orm/message.py b/mirix/orm/message.py
index bab3fa91b..8362178de 100755
--- a/mirix/orm/message.py
+++ b/mirix/orm/message.py
@@ -32,6 +32,7 @@ class Message(SqlalchemyBase, OrganizationMixin, UserMixin, AgentMixin):
         Index("ix_messages_created_at", "created_at", "id"),
         Index("ix_messages_client_user", "client_id", "user_id"),
         Index("ix_messages_agent_client_user", "agent_id", "client_id", "user_id"),
+        Index("ix_messages_agent_user_created_at", "agent_id", "user_id", "created_at", "id"),
     )
     __pydantic_model__ = PydanticMessage
 
diff --git a/mirix/orm/sqlalchemy_base.py b/mirix/orm/sqlalchemy_base.py
index 8811943e7..cf64c4fe4 100755
--- a/mirix/orm/sqlalchemy_base.py
+++ b/mirix/orm/sqlalchemy_base.py
@@ -792,8 +792,6 @@ async def _update_redis_cache(self, operation: str = "update", actor: Optional["
                 else:
                     data = self.to_pydantic().model_dump(mode="json")
 
-                    if "message_ids" in data and data["message_ids"]:
-                        data["message_ids"] = json.dumps(data["message_ids"])
                     if "llm_config" in data and data["llm_config"]:
                         data["llm_config"] = json.dumps(data["llm_config"])
                     if "embedding_config" in data and data["embedding_config"]:
diff --git a/mirix/schemas/agent.py b/mirix/schemas/agent.py
index 59930ec59..f04e37954 100755
--- a/mirix/schemas/agent.py
+++ b/mirix/schemas/agent.py
@@ -8,7 +8,7 @@
 from mirix.schemas.block import CreateBlock
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.llm_config import LLMConfig
-from mirix.schemas.message import Message, MessageCreate
+from mirix.schemas.message import Message
 from mirix.schemas.mirix_base import OrmMetadataBase
 from mirix.schemas.openai.chat_completion_response import UsageStatistics
 from mirix.schemas.tool import Tool
@@ -23,7 +23,7 @@ class AgentType(str, Enum):
     """
 
     coder_agent = "coder_agent"
-    chat_agent = "chat_agent"
+    chat_agent = "chat_agent"  # DEPRECATED: use a memory agent type instead
     reflexion_agent = "reflexion_agent"
     background_agent = "background_agent"
     episodic_memory_agent = "episodic_memory_agent"
@@ -58,12 +58,6 @@ class AgentState(OrmMetadataBase, validate_assignment=True):
     # tool rules
     tool_rules: Optional[List[ToolRule]] = Field(default=None, description="The list of tool rules.")
 
-    # in-context memory
-    message_ids: Optional[List[str]] = Field(
-        default=None,
-        description="The ids of the messages in the agent's in-context memory.",
-    )
-
     # system prompt
     system: str = Field(..., description="The system prompt used by the agent.")
 
@@ -135,12 +129,6 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
     embedding_config: Optional[EmbeddingConfig] = Field(
         None, description="The embedding configuration used by the agent."
     )
-    # Note: if this is None, then we'll populate with the standard "more human than human" initial message sequence
-    # If the client wants to make this empty, then the client can set the arg to an empty list
-    initial_message_sequence: Optional[List[MessageCreate]] = Field(
-        None,
-        description="The initial set of messages to put in the agent's in-context memory.",
-    )
     include_base_tools: bool = Field(
         True,
         description="If true, attaches the Mirix core tools (e.g. archival_memory and core_memory related functions).",
@@ -236,9 +224,6 @@ class UpdateAgent(BaseModel):
     embedding_config: Optional[EmbeddingConfig] = Field(
         None, description="The embedding configuration used by the agent."
     )
-    message_ids: Optional[List[str]] = Field(
-        None, description="The ids of the messages in the agent's in-context memory."
-    )
     description: Optional[str] = Field(None, description="The description of the agent.")
     parent_id: Optional[str] = Field(None, description="The parent agent ID (for sub-agents in a meta-agent).")
     mcp_tools: Optional[List[str]] = Field(None, description="List of MCP server names to connect to this agent.")
diff --git a/mirix/schemas/client.py b/mirix/schemas/client.py
index ccac0d2f9..d4d46f435 100644
--- a/mirix/schemas/client.py
+++ b/mirix/schemas/client.py
@@ -46,6 +46,11 @@ class Client(ClientBase):
     write_scope: Optional[str] = Field(default=None, description="Scope for writing memories (null = read-only).")
     read_scopes: List[str] = Field(default_factory=list, description="Scopes for reading memories.")
 
+    # Message retention
+    message_set_retention_count: Optional[int] = Field(
+        default=0, description="Number of input message-sets to retain per (agent, user). 0 = no retention."
+    )
+
     # Dashboard authentication fields
     email: Optional[str] = Field(default=None, description="Email address for dashboard login.")
     password_hash: Optional[str] = Field(default=None, description="Hashed password for dashboard login.")
@@ -72,3 +77,6 @@ class ClientUpdate(ClientBase):
     write_scope: Optional[str] = Field(default=None, description="The new write scope of the client.")
     read_scopes: Optional[List[str]] = Field(default=None, description="The new read scopes of the client.")
     organization_id: Optional[str] = Field(default=None, description="The new organization id of the client.")
+    message_set_retention_count: Optional[int] = Field(
+        default=None, description="Number of input message-sets to retain per (agent, user). 0 = no retention."
+    )
diff --git a/mirix/sdk.py b/mirix/sdk.py
index 8632c4ae4..ffab98170 100644
--- a/mirix/sdk.py
+++ b/mirix/sdk.py
@@ -4,7 +4,6 @@
 All I/O methods are async. Use Mirix.create() to construct an instance.
 """
 
-import asyncio
 import logging
 import os
 from pathlib import Path
@@ -246,44 +245,17 @@ async def get_user_by_name(self, user_name: str):
                 return user
         return None
 
-    def clear(self) -> Dict[str, Any]:
-        """
-        Clear all memories.
-
-        Note: This requires manual database file removal and app restart.
-
-        Returns:
-            Dict with warning message and instructions
-
-        Example:
-            result = memory_agent.clear()
-            logger.debug(result['warning'])
-            for step in result['instructions']:
-                logger.debug(step)
-        """
-        return {
-            "success": False,
-            "warning": "Memory clearing requires manual database reset.",
-            "instructions": [
-                "1. Stop the Mirix application/process",
-                "2. Remove the database file: ~/.mirix/sqlite.db",
-                "3. Restart the Mirix application",
-                "4. Initialize a new Mirix agent",
-            ],
-            "manual_command": "rm ~/.mirix/sqlite.db",
-            "note": "After removing the database file, you must restart your application and create a new agent instance.",
-        }
-
     async def clear_conversation_history(self, user_id: Optional[str] = None) -> Dict[str, Any]:
         """
         Clear conversation history while preserving memories.
 
-        This removes user and assistant messages from the conversation
-        history but keeps system messages and all stored memories intact.
+        This removes persisted conversation message rows while preserving
+        memory tables and agent configuration.
 
         Args:
-            user_id: User ID to clear messages for. If None, clears all messages
-                    except system messages. If provided, only clears messages for that specific user.
+            user_id: User ID to clear messages for. If None, clears all retained
+                    conversation rows for the meta agent. If provided, only clears
+                    messages for that specific user.
 
         Returns:
             Dict containing success status, message, and count of deleted messages
@@ -303,24 +275,15 @@ async def clear_conversation_history(self, user_id: Optional[str] = None) -> Dic
         self._require_meta_agent()
         try:
             if user_id is None:
-                agent_state = await self._client.server.agent_manager.get_agent_by_id(
-                    agent_id=self._meta_agent.id,
-                    actor=self._client.client,
-                )
-                current_messages = await self._client.server.agent_manager.get_in_context_messages(
-                    agent_state=agent_state,
-                    actor=self._client.client,
-                )
-                messages_count = len(current_messages)
+                messages_count = 0  # count not available without per-user query
                 await self._client.server.agent_manager.reset_messages(
                     agent_id=self._meta_agent.id,
                     actor=self._client.client,
                     user_id=None,
-                    add_default_initial_messages=True,
                 )
                 return {
                     "success": True,
-                    "message": "Successfully cleared conversation history. All user and assistant messages removed (system messages preserved).",
+                    "message": "Successfully cleared retained conversation history.",
                     "messages_deleted": messages_count,
                 }
             else:
@@ -331,76 +294,26 @@ async def clear_conversation_history(self, user_id: Optional[str] = None) -> Dic
                         "error": f"User with ID '{user_id}' not found",
                         "messages_deleted": 0,
                     }
-                agent_state = await self._client.server.agent_manager.get_agent_by_id(
+                current_messages = await self._client.server.message_manager.get_messages_for_agent_user(
                     agent_id=self._meta_agent.id,
+                    user_id=target_user.id,
                     actor=self._client.client,
+                    limit=10000,
                 )
-                current_messages = await self._client.server.agent_manager.get_in_context_messages(
-                    agent_state=agent_state,
-                    actor=self._client.client,
-                    user=target_user,
-                )
-                user_messages_count = len(
-                    [msg for msg in current_messages if msg.role != "system" and msg.user_id == target_user.id]
-                )
+                user_messages_count = len(current_messages)
                 await self._client.server.agent_manager.reset_messages(
                     agent_id=self._meta_agent.id,
                     actor=self._client.client,
                     user_id=target_user.id,
-                    add_default_initial_messages=True,
                 )
                 return {
                     "success": True,
-                    "message": f"Successfully cleared conversation history for {target_user.name}. Messages from other users and system messages preserved.",
+                    "message": f"Successfully cleared conversation history for {target_user.name}.",
                     "messages_deleted": user_messages_count,
                 }
         except Exception as e:
             return {"success": False, "error": str(e), "messages_deleted": 0}
 
-    def save(self, path: Optional[str] = None) -> Dict[str, Any]:
-        """
-        Save the current memory state to disk.
-
-        Note: Save/backup functionality is not yet implemented in the client-based SDK.
-        Please use the database backup directly.
-
-        Args:
-            path: Save directory path (optional). If not provided, generates
-                 timestamp-based directory name.
-
-        Returns:
-            Dict containing success status and backup path
-
-        Example:
-            result = memory_agent.save("./my_backup")
-        """
-        return {
-            "success": False,
-            "error": "Save functionality not yet implemented in client-based SDK. Please backup the database directly.",
-            "path": path or "N/A",
-        }
-
-    def load(self, path: str) -> Dict[str, Any]:
-        """
-        Load memory state from a backup directory.
-
-        Note: Load/restore functionality is not yet implemented in the client-based SDK.
-        Please restore the database directly.
-
-        Args:
-            path: Path to backup directory
-
-        Returns:
-            Dict containing success status and any error messages
-
-        Example:
-            result = memory_agent.load("./my_backup")
-        """
-        return {
-            "success": False,
-            "error": "Load functionality not yet implemented in client-based SDK. Please restore the database directly.",
-        }
-
     def _reload_model_settings(self):
         """
         Force reload of model_settings to pick up new environment variables.
@@ -646,8 +559,6 @@ async def visualize_memories(self, user_id: Optional[str] = None) -> Dict[str, A
             if not target_user:
                 return {"success": False, "error": "No user found"}
 
-            meta_agent_state = await self._client.get_agent(self._meta_agent.id)
-
             memories = {}
 
             # Get episodic memory
diff --git a/mirix/server/rest_api.py b/mirix/server/rest_api.py
index 27f306802..dfdc490d0 100644
--- a/mirix/server/rest_api.py
+++ b/mirix/server/rest_api.py
@@ -729,7 +729,6 @@ class CreateAgentRequest(BaseModel):
     include_meta_memory_tools: Optional[bool] = False
     metadata: Optional[Dict] = None
     description: Optional[str] = None
-    initial_message_sequence: Optional[List[Message]] = None
     tags: Optional[List[str]] = None
 
 
@@ -767,7 +766,6 @@ async def create_agent(
         "agent_type": request.agent_type,
         "llm_config": request.llm_config,
         "embedding_config": request.embedding_config,
-        "initial_message_sequence": request.initial_message_sequence,
         "tags": request.tags,
     }
 
@@ -828,7 +826,6 @@ class UpdateAgentRequest(BaseModel):
     metadata: Optional[Dict] = None
     llm_config: Optional[LLMConfig] = None
     embedding_config: Optional[EmbeddingConfig] = None
-    message_ids: Optional[List[str]] = None
     memory: Optional[Memory] = None
     tags: Optional[List[str]] = None
 
@@ -985,8 +982,7 @@ async def update_agent_system_prompt(
     The update process:
     1. Updates the agent.system field in PostgreSQL
     2. Updates the agent.system field in Redis cache
-    3. Creates a new system message
-    4. Updates message_ids[0] to reference the new system message
+    3. Updates agent.system in DB and cache
 
     Args:
         agent_id: ID of the agent to update (e.g., "agent-123")
diff --git a/mirix/server/server.py b/mirix/server/server.py
index 3a200188c..5c218ed8e 100644
--- a/mirix/server/server.py
+++ b/mirix/server/server.py
@@ -34,7 +34,6 @@
     ReflexionAgent,
     ResourceMemoryAgent,
     SemanticMemoryAgent,
-    save_agent,
 )
 from mirix.config import MirixConfig
 
@@ -783,9 +782,6 @@ async def _command(self, user_id: str, agent_id: str, command: str) -> MirixUsag
             # exit not supported on server.py
             raise ValueError(command)
 
-        elif command.lower() == "save" or command.lower() == "savechat":
-            await save_agent(mirix_agent)
-
         elif command.lower() == "dump" or command.lower().startswith("dump "):
             # Check if there's an additional argument that's an integer
             command = command.strip().split()
diff --git a/mirix/services/agent_manager.py b/mirix/services/agent_manager.py
index 3250146e6..fdd053946 100644
--- a/mirix/services/agent_manager.py
+++ b/mirix/services/agent_manager.py
@@ -22,19 +22,14 @@
 )
 from mirix.log import get_logger
 from mirix.orm import Agent as AgentModel
-from mirix.orm import Block as BlockModel
 from mirix.orm import Tool as ToolModel
 from mirix.orm.errors import NoResultFound
 from mirix.schemas.agent import AgentState as PydanticAgentState
 from mirix.schemas.agent import AgentType, CreateAgent, CreateMetaAgent, UpdateAgent, UpdateMetaAgent
-from mirix.schemas.block import Block
-from mirix.schemas.block import Block as PydanticBlock
 from mirix.schemas.client import Client as PydanticClient
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.enums import ToolType
 from mirix.schemas.llm_config import LLMConfig
-from mirix.schemas.message import Message as PydanticMessage
-from mirix.schemas.message import MessageCreate
 from mirix.schemas.tool_rule import ToolRule as PydanticToolRule
 from mirix.schemas.user import User as PydanticUser
 from mirix.services.block_manager import BlockManager
@@ -42,13 +37,11 @@
     _process_relationship,
     check_supports_structured_output,
     derive_system_message,
-    initialize_message_sequence,
-    package_initial_message_sequence,
 )
 from mirix.services.message_manager import MessageManager
 from mirix.services.tool_manager import ToolManager
 from mirix.services.user_manager import UserManager
-from mirix.utils import create_random_username, enforce_types, get_utc_time
+from mirix.utils import create_random_username, enforce_types
 
 logger = get_logger(__name__)
 
@@ -165,9 +158,7 @@ async def create_agent(
             actor=actor,
         )
 
-        return await self.append_initial_message_sequence_to_in_context_messages(
-            actor, agent_state, agent_create.initial_message_sequence
-        )
+        return agent_state
 
     async def create_meta_agent(
         self,
@@ -404,7 +395,7 @@ async def update_meta_agent(
             )
             meta_agent_state = await self.get_agent_by_id(agent_id=meta_agent_id, actor=actor)
 
-        # Update meta agent's system prompt if provided (separate call needed for rebuild_system_prompt)
+        # Update meta agent's system prompt if provided
         if meta_agent_update.system_prompts and "meta_memory_agent" in meta_agent_update.system_prompts:
             await self.update_system_prompt(
                 agent_id=meta_agent_id,
@@ -627,62 +618,6 @@ async def update_agent_tools_and_system_prompts(
                 actor=actor,
             )
 
-    @enforce_types
-    def _generate_initial_message_sequence(
-        self,
-        actor: PydanticClient,
-        agent_state: PydanticAgentState,
-        supplied_initial_message_sequence: Optional[List[MessageCreate]] = None,
-        user_id: Optional[str] = None,
-    ) -> List[PydanticMessage]:
-        init_messages = initialize_message_sequence(
-            agent_state=agent_state,
-            memory_edit_timestamp=get_utc_time(),
-            include_initial_boot_message=True,
-        )
-        if supplied_initial_message_sequence is not None:
-            # We always need the system prompt up front
-            system_message_obj = PydanticMessage.dict_to_message(
-                agent_id=agent_state.id,
-                model=agent_state.llm_config.model,
-                openai_message_dict=init_messages[0],
-            )
-            # Don't use anything else in the pregen sequence, instead use the provided sequence
-            init_messages = [system_message_obj]
-            init_messages.extend(
-                package_initial_message_sequence(
-                    agent_state.id,
-                    supplied_initial_message_sequence,
-                    agent_state.llm_config.model,
-                    actor,
-                    user_id=user_id,
-                )
-            )
-        else:
-            init_messages = [
-                PydanticMessage.dict_to_message(
-                    agent_id=agent_state.id,
-                    model=agent_state.llm_config.model,
-                    openai_message_dict=msg,
-                )
-                for msg in init_messages
-            ]
-
-        return init_messages
-
-    @enforce_types
-    async def append_initial_message_sequence_to_in_context_messages(
-        self,
-        actor: PydanticClient,
-        agent_state: PydanticAgentState,
-        initial_message_sequence: Optional[List[MessageCreate]] = None,
-        user_id: Optional[str] = None,
-    ) -> PydanticAgentState:
-        init_messages = self._generate_initial_message_sequence(
-            actor, agent_state, initial_message_sequence, user_id=user_id
-        )
-        return await self.append_to_in_context_messages(init_messages, agent_id=agent_state.id, actor=actor, user_id=user_id)
-
     @enforce_types
     async def _create_agent(
         self,
@@ -728,24 +663,8 @@ async def _create_agent(
 
     @enforce_types
     async def update_agent(self, agent_id: str, agent_update: UpdateAgent, actor: PydanticClient) -> PydanticAgentState:
-        # Get current state BEFORE update to detect changes
-        old_agent_state = None
-        if agent_update.system:
-            old_agent_state = await self.get_agent_by_id(agent_id=agent_id, actor=actor)
-
-        # Update agent (including system field in database)
-        agent_state = await self._update_agent(agent_id=agent_id, agent_update=agent_update, actor=actor)
-
-        # Rebuild the system prompt if it changed
-        if agent_update.system and old_agent_state and agent_update.system != old_agent_state.system:
-            agent_state = await self.rebuild_system_prompt(
-                agent_id=agent_state.id,
-                system_prompt=agent_update.system,  # Pass the new system prompt
-                actor=actor,
-                force=True,
-            )
-
-        return agent_state
+        # Update agent (system prompt and all other fields are persisted directly)
+        return await self._update_agent(agent_id=agent_id, agent_update=agent_update, actor=actor)
 
     @enforce_types
     async def update_llm_config(self, agent_id: str, llm_config: LLMConfig, actor: PydanticClient) -> PydanticAgentState:
@@ -757,19 +676,11 @@ async def update_llm_config(self, agent_id: str, llm_config: LLMConfig, actor: P
 
     @enforce_types
     async def update_system_prompt(self, agent_id: str, system_prompt: str, actor: PydanticClient) -> PydanticAgentState:
-        agent_state = await self.update_agent(
+        return await self.update_agent(
             agent_id=agent_id,
             agent_update=UpdateAgent(system=system_prompt),
             actor=actor,
         )
-        # Rebuild the system prompt if it's different
-        agent_state = await self.rebuild_system_prompt(
-            agent_id=agent_state.id,
-            system_prompt=system_prompt,
-            actor=actor,
-            force=True,
-        )
-        return agent_state
 
     @enforce_types
     async def update_mcp_tools(
@@ -837,7 +748,6 @@ async def _update_agent(self, agent_id: str, agent_update: UpdateAgent, actor: P
                 "system",
                 "llm_config",
                 "embedding_config",
-                "message_ids",
                 "tool_rules",
                 "mcp_tools",
                 "parent_id",
@@ -984,12 +894,6 @@ async def _reconstruct_children_from_cache(
                     continue
 
                 # Deserialize JSON fields
-                if "message_ids" in child_data:
-                    child_data["message_ids"] = (
-                        json.loads(child_data["message_ids"])
-                        if isinstance(child_data["message_ids"], (str, bytes))
-                        else child_data["message_ids"]
-                    )
                 if "llm_config" in child_data:
                     child_data["llm_config"] = (
                         json.loads(child_data["llm_config"])
@@ -1302,12 +1206,6 @@ async def get_agent_by_id(self, agent_id: str, actor: PydanticClient) -> Pydanti
                     logger.debug("Cache HIT for agent %s", agent_id)
 
                     # Deserialize JSON fields
-                    if "message_ids" in cached_data:
-                        cached_data["message_ids"] = (
-                            json.loads(cached_data["message_ids"])
-                            if isinstance(cached_data["message_ids"], str)
-                            else cached_data["message_ids"]
-                        )
                     if "llm_config" in cached_data:
                         cached_data["llm_config"] = (
                             json.loads(cached_data["llm_config"])
@@ -1397,8 +1295,6 @@ async def get_agent_by_id(self, agent_id: str, actor: PydanticClient) -> Pydanti
 
                     data = pydantic_agent.model_dump(mode="json")
 
-                    if "message_ids" in data and data["message_ids"]:
-                        data["message_ids"] = json.dumps(data["message_ids"])
                     if "llm_config" in data and data["llm_config"]:
                         data["llm_config"] = json.dumps(data["llm_config"])
                     if "embedding_config" in data and data["embedding_config"]:
@@ -1495,185 +1391,17 @@ async def delete_agent(self, agent_id: str, actor: PydanticClient) -> None:
                 await self._invalidate_parent_cache_for_child(agent_id, parent_id)
 
     # ======================================================================================================================
-    # In Context Messages Management
+    # Message Management
     # ======================================================================================================================
-    # TODO: There are several assumptions here that are not explicitly checked
-    # TODO: 1) These message ids are valid
-    # TODO: 2) These messages are ordered from oldest to newest
-    # TODO: This can be fixed by having an actual relationship in the ORM for message_ids
-    # TODO: This can also be made more efficient, instead of getting, setting, we can do it all in one db session for one query.
-    # @enforce_types
-    # def get_in_context_messages(
-    #     self, agent_id: str, actor: PydanticClient
-    # ) -> List[PydanticMessage]:
-    #     message_ids = await self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
-    #     messages = self.message_manager.get_messages_by_ids(
-    #         message_ids=message_ids, actor=actor
-    #     )
-    #     messages = [messages[0]] + [
-    #         message for message in messages[1:] if message.user_id == actor.id
-    #     ]
-    #     return messages
-    @enforce_types
-    async def get_in_context_messages(
-        self,
-        agent_state: PydanticAgentState,
-        actor: PydanticClient,
-        user: Optional[PydanticUser] = None,
-    ) -> List[PydanticMessage]:
-        message_ids = agent_state.message_ids
-        messages = await self.message_manager.get_messages_by_ids(message_ids=message_ids, actor=actor)
-        # Handle empty message list (e.g., after deletion)
-        if not messages:
-            return []
-
-        # Keep first message (system message) and filter rest by user_id
-        if user:
-            messages = [messages[0]] + [message for message in messages[1:] if message.user_id == user.id]
-        return messages
-
-    @enforce_types
-    async def get_system_message(self, agent_id: str, actor: PydanticClient) -> PydanticMessage:
-        agent_state = await self.get_agent_by_id(agent_id=agent_id, actor=actor)
-        message_ids = agent_state.message_ids
-
-        # Handle empty message_ids (e.g., after deletion)
-        if not message_ids:
-            return None
-
-        return await self.message_manager.get_message_by_id(message_id=message_ids[0], actor=actor)
-
-    @enforce_types
-    async def rebuild_system_prompt(
-        self, agent_id: str, system_prompt: str, actor: PydanticClient, force=False
-    ) -> PydanticAgentState:
-        """Rebuld the system prompt, put the system_prompt at the first position in the list of messages."""
-
-        agent_state = await self.get_agent_by_id(agent_id=agent_id, actor=actor)
-        # Swap the system message out (only if there is a diff)
-        message = PydanticMessage.dict_to_message(
-            agent_id=agent_id,
-            model=agent_state.llm_config.model,
-            openai_message_dict={"role": "system", "content": system_prompt},
-        )
-        message = await self.message_manager.create_message(message, actor=actor)
-        message_ids = [message.id] + agent_state.message_ids[1:]  # swap index 0 (system)
-        return await self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
-
-    @enforce_types
-    async def set_in_context_messages(
-        self, agent_id: str, message_ids: List[str], actor: PydanticClient
-    ) -> PydanticAgentState:
-        return await self.update_agent(
-            agent_id=agent_id,
-            agent_update=UpdateAgent(message_ids=message_ids),
-            actor=actor,
-        )
-
-    @enforce_types
-    async def trim_older_in_context_messages(
-        self,
-        num: int,
-        agent_id: str,
-        actor: PydanticClient,
-        user_id: Optional[str] = None,
-    ) -> PydanticAgentState:
-        """
-        Trim older messages from the in-context message list, keeping `num` most recent messages
-        for the specified user. Messages from other users are preserved.
-
-        Args:
-            num: Number of most recent user messages to keep.
-            agent_id: The agent ID.
-            actor: The Client performing the operation.
-            user_id: The user whose messages to trim. If None, trims all non-system messages.
-        """
-        message_ids = await self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
-        system_message_id = message_ids[0]
-        message_ids = message_ids[1:]
-
-        message_id_indices_belonging_to_user = []
-        for idx, message_id in enumerate(message_ids):
-            msg = await self.message_manager.get_message_by_id(message_id=message_id, actor=actor)
-            if msg and msg.user_id == user_id:
-                message_id_indices_belonging_to_user.append(idx)
-        message_ids_belonging_to_user = [message_ids[idx] for idx in message_id_indices_belonging_to_user]
-        message_ids_to_keep = [message_ids[idx] for idx in message_id_indices_belonging_to_user[num - 1 :]]
-
-        message_ids_belonging_to_user = set(message_ids_belonging_to_user)
-        message_ids_to_keep = set(message_ids_to_keep)
-
-        # new_messages = [message_ids[0]] + message_ids[num:]  # 0 is system message
-        new_messages = [system_message_id] + [
-            msg_id
-            for msg_id in message_ids
-            if (msg_id not in message_ids_belonging_to_user or msg_id in message_ids_to_keep)
-        ]
-        return await self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
-
-    @enforce_types
-    async def trim_all_in_context_messages_except_system(
-        self, agent_id: str, actor: PydanticClient, user_id: Optional[str] = None
-    ) -> PydanticAgentState:
-        """
-        Remove all messages except the system message for a specific user.
-        Messages from other users are preserved.
-
-        Args:
-            agent_id: The agent ID.
-            actor: The Client performing the operation.
-            user_id: The user whose messages to remove. If None, removes all non-system messages.
-        """
-        message_ids = await self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
-        system_message_id = message_ids[0]  # 0 is system message
-
-        # Keep system message and only filter out messages belonging to the specified user
-        new_message_ids = [system_message_id]
-        for message_id in message_ids[1:]:  # Skip system message
-            message = await self.message_manager.get_message_by_id(message_id=message_id, actor=actor)
-            if message.user_id != user_id:
-                new_message_ids.append(message_id)
-
-        return await self.set_in_context_messages(agent_id=agent_id, message_ids=new_message_ids, actor=actor)
-
-    @enforce_types
-    async def prepend_to_in_context_messages(
-        self,
-        messages: List[PydanticMessage],
-        agent_id: str,
-        actor: PydanticClient,
-        user_id: Optional[str] = None,
-    ) -> PydanticAgentState:
-        message_ids = await self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
-        new_messages = await self.message_manager.create_many_messages(messages, actor=actor, user_id=user_id)
-        message_ids = [message_ids[0]] + [m.id for m in new_messages] + message_ids[1:]
-        return await self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
-
-    @enforce_types
-    async def append_to_in_context_messages(
-        self,
-        messages: List[PydanticMessage],
-        agent_id: str,
-        actor: PydanticClient,
-        user_id: Optional[str] = None,
-    ) -> PydanticAgentState:
-        messages = await self.message_manager.create_many_messages(messages, actor=actor, user_id=user_id)
-        agent_state = await self.get_agent_by_id(agent_id=agent_id, actor=actor)
-        message_ids = list(agent_state.message_ids or [])
-        message_ids += [m.id for m in messages]
-        return await self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
-
     @enforce_types
     async def reset_messages(
         self,
         agent_id: str,
         actor: PydanticClient,
         user_id: Optional[str] = None,
-        add_default_initial_messages: bool = False,
     ) -> PydanticAgentState:
         """
         Removes messages belonging to the specified user from the agent's conversation history.
-        Preserves system messages and messages from other users.
 
         This action is destructive and cannot be undone once committed.
 
@@ -1681,63 +1409,37 @@ async def reset_messages(
             agent_id (str): The ID of the agent whose messages will be reset.
             actor (PydanticClient): The Client performing this action.
             user_id (str): The user whose messages will be removed. If None, removes all non-system messages.
-            add_default_initial_messages: If true, adds the default initial messages after resetting.
 
         Returns:
-            PydanticAgentState: The updated agent state with user's messages removed.
+            PydanticAgentState: The updated agent state.
         """
-        async with self.session_maker() as session:
-            # Retrieve the existing agent (will raise NoResultFound if invalid)
-            agent = await AgentModel.read(db_session=session, identifier=agent_id, actor=actor)
-
-            # Get current messages to filter
-            current_messages = agent.messages
-
-            # Filter out messages belonging to the specific user, but keep:
-            # 1. System messages (role='system') - always keep
-            # 2. Messages from other users (user_id != specified user_id)
-            messages_to_keep = []
-            messages_to_remove = []
-
-            for message in current_messages:
-                if message.role == "system":
-                    # Always keep system messages
-                    messages_to_keep.append(message)
-                elif user_id is None or message.user_id == user_id:
-                    # Remove this user's messages (or all if user_id is None)
-                    messages_to_remove.append(message)
-                else:
-                    # Keep messages from other users
-                    messages_to_keep.append(message)
-
-            # Update the agent's messages relationship to only keep filtered messages
-            agent.messages = messages_to_keep
-
-            # Update message_ids to reflect the remaining messages
-            # Keep the order based on created_at timestamp
-            agent.message_ids = [msg.id for msg in messages_to_keep]
-
-            # Commit the update
-            await agent.update(db_session=session, actor=actor)
+        if user_id:
+            await self.message_manager.hard_delete_user_messages_for_agent(
+                agent_id=agent_id,
+                user_id=user_id,
+                actor=actor,
+                keep_newest_n=0,
+            )
+        else:
+            # Delete all non-system messages for every user of this agent
+            from sqlalchemy import delete
+
+            from mirix.orm.message import Message as MessageModel
+            from mirix.schemas.message import MessageRole
+
+            async with self.session_maker() as session:
+                await session.execute(
+                    delete(MessageModel).where(
+                        MessageModel.agent_id == agent_id,
+                        MessageModel.organization_id == actor.organization_id,
+                        MessageModel.role != MessageRole.system,
+                    )
+                )
+                await session.commit()
 
-            agent_state = agent.to_pydantic()
+        agent_state = await self.get_agent_by_id(agent_id=agent_id, actor=actor)
 
-        if add_default_initial_messages:
-            return await self.append_initial_message_sequence_to_in_context_messages(actor, agent_state, user_id=user_id)
-        else:
-            # We still want to always have a system message
-            init_messages = initialize_message_sequence(
-                agent_state=agent_state,
-                memory_edit_timestamp=get_utc_time(),
-                include_initial_boot_message=True,
-            )
-            system_message = PydanticMessage.dict_to_message(
-                agent_id=agent_state.id,
-                user_id=agent_state.created_by_id,
-                model=agent_state.llm_config.model,
-                openai_message_dict=init_messages[0],
-            )
-            return await self.append_to_in_context_messages([system_message], agent_id=agent_state.id, actor=actor)
+        return agent_state
 
     # ======================================================================================================================
     # Tool Management
diff --git a/mirix/services/client_manager.py b/mirix/services/client_manager.py
index ffcfecd47..e9b7feeeb 100644
--- a/mirix/services/client_manager.py
+++ b/mirix/services/client_manager.py
@@ -507,7 +507,7 @@ async def delete_memories_by_client_id(self, client_id: str):
 
             logger.debug("Bulk deleted %d blocks", block_count)
 
-            # Clear message_ids from agents in PostgreSQL (they reference deleted messages)
+            # Collect agent IDs for cache invalidation (messages already deleted above)
             agent_ids: List[str] = []
             async with self.session_maker() as session:
                 from mirix.orm.agent import Agent as AgentModel
@@ -518,16 +518,6 @@ async def delete_memories_by_client_id(self, client_id: str):
                 result_agents = await session.execute(stmt_agents)
                 agents = result_agents.scalars().all()
                 agent_ids = [agent.id for agent in agents]
-                for agent in agents:
-                    if agent.message_ids and len(agent.message_ids) > 0:
-                        agent.message_ids = [agent.message_ids[0]]
-
-                await session.commit()
-                logger.debug(
-                    "Cleared conversation message_ids from %d agents in PostgreSQL "
-                    "(kept system messages)",
-                    len(agent_ids),
-                )
 
             from mirix.database.cache_provider import get_cache_provider
 
diff --git a/mirix/services/helpers/agent_manager_helper.py b/mirix/services/helpers/agent_manager_helper.py
index 8bb05d2f7..8fd5e59ac 100755
--- a/mirix/services/helpers/agent_manager_helper.py
+++ b/mirix/services/helpers/agent_manager_helper.py
@@ -3,20 +3,14 @@
 
 from sqlalchemy import select
 
-from mirix import system
 from mirix.constants import IN_CONTEXT_MEMORY_KEYWORD, STRUCTURED_OUTPUT_MODELS
 from mirix.helpers import ToolRulesSolver
 from mirix.orm.agent import Agent as AgentModel
 from mirix.orm.errors import NoResultFound
 from mirix.prompts import gpt_system
-from mirix.schemas.agent import AgentState, AgentType
-from mirix.schemas.client import Client
-from mirix.schemas.enums import MessageRole
+from mirix.schemas.agent import AgentType
 from mirix.schemas.memory import Memory
-from mirix.schemas.message import Message, MessageCreate
 from mirix.schemas.tool_rule import ToolRule
-from mirix.schemas.user import User
-from mirix.utils import get_local_time
 
 
 async def _process_relationship(
@@ -179,67 +173,6 @@ def compile_system_message(
     return formatted_prompt
 
 
-def initialize_message_sequence(
-    agent_state: AgentState,
-    memory_edit_timestamp: Optional[datetime.datetime] = None,
-    include_initial_boot_message: bool = True,
-    previous_message_count: int = 0,
-    archival_memory_size: int = 0,
-) -> List[dict]:
-    if memory_edit_timestamp is None:
-        memory_edit_timestamp = get_local_time()
-
-    messages = [
-        {"role": "system", "content": agent_state.system},
-    ]
-
-    return messages
-
-
-def package_initial_message_sequence(
-    agent_id: str,
-    initial_message_sequence: List[MessageCreate],
-    model: str,
-    actor: Client,
-    user_id: Optional[str] = None,
-) -> List[Message]:
-    """
-    Package initial messages for an agent.
-
-    Args:
-        agent_id: The agent ID these messages belong to.
-        initial_message_sequence: List of messages to package.
-        model: The LLM model name.
-        actor: The Client performing the operation (used for organization_id).
-        user_id: The user ID to associate with these messages. If not provided,
-                 messages will have user_id=None.
-    """
-    init_messages = []
-    for message_create in initial_message_sequence:
-        if message_create.role == MessageRole.user:
-            packed_message = system.package_user_message(
-                user_message=message_create.text,
-            )
-        elif message_create.role == MessageRole.system:
-            packed_message = system.package_system_message(
-                system_message=message_create.text,
-            )
-        else:
-            raise ValueError(f"Invalid message role: {message_create.role}")
-
-        init_messages.append(
-            Message(
-                role=message_create.role,
-                text=packed_message,
-                organization_id=actor.organization_id,
-                user_id=user_id,
-                agent_id=agent_id,
-                model=model,
-            )
-        )
-    return init_messages
-
-
 def check_supports_structured_output(model: str, tool_rules: List[ToolRule]) -> bool:
     if model not in STRUCTURED_OUTPUT_MODELS:
         if len(ToolRulesSolver(tool_rules=tool_rules).init_tool_rules) > 1:
diff --git a/mirix/services/message_manager.py b/mirix/services/message_manager.py
index 1ccffdb3d..823eafbcf 100755
--- a/mirix/services/message_manager.py
+++ b/mirix/services/message_manager.py
@@ -515,114 +515,127 @@ async def list_messages_for_agent(
 
             return [msg.to_pydantic() for msg in results]
 
-    @enforce_types
-    async def delete_detached_messages_for_agent(self, agent_id: str, actor: PydanticClient) -> int:
+    async def get_messages_for_agent_user(
+        self,
+        agent_id: str,
+        user_id: str,
+        actor: PydanticClient,
+        limit: int = 10,
+    ) -> List[PydanticMessage]:
         """
-        Delete messages that belong to an agent but are not in the agent's current message_ids list.
+        Fetch the most recent N messages for a given (agent, user) pair, returned in
+        chronological order (oldest first).
 
-        This is useful for cleaning up messages that were removed from context during
-        context window management but still exist in the database.
+        Uses the composite index ix_messages_agent_user_created_at for efficient retrieval.
 
         Args:
-            agent_id: The ID of the agent to clean up messages for
-            actor: The user performing this action
+            agent_id: The agent whose messages to retrieve
+            user_id: The user whose messages to retrieve
+            actor: Client performing the operation (for org scoping)
+            limit: Maximum number of messages to return (newest N, then reversed)
 
         Returns:
-            int: Number of messages deleted
+            List of messages in chronological order
         """
-        async with self.session_maker() as session:
-            # First, get the agent to access its current message_ids
-            from mirix.orm.agent import Agent as AgentModel
-
-            try:
-                agent = await AgentModel.read(db_session=session, identifier=agent_id, actor=actor)
-            except NoResultFound:
-                raise ValueError(f"Agent with id {agent_id} not found.")
+        from sqlalchemy import desc
 
-            # Get current message_ids (messages that should be kept)
-            current_message_ids = set(agent.message_ids or [])
-
-            # Find all messages for this agent
-            all_messages = await MessageModel.list(
-                db_session=session,
-                agent_id=agent_id,
-                organization_id=actor.organization_id,
-                limit=None,  # Get all messages
+        async with self.session_maker() as session:
+            stmt = (
+                select(MessageModel)
+                .where(
+                    MessageModel.agent_id == agent_id,
+                    MessageModel.user_id == user_id,
+                    MessageModel.organization_id == actor.organization_id,
+                    MessageModel.is_deleted == False,
+                )
+                .order_by(desc(MessageModel.created_at), desc(MessageModel.id))
+                .limit(limit)
             )
+            result = await session.execute(stmt)
+            messages = result.scalars().all()
 
-            # Identify detached messages (not in current message_ids)
-            detached_messages = [msg for msg in all_messages if msg.id not in current_message_ids]
-
-            # Delete detached messages (and clean up Redis cache)
-            deleted_count = 0
-            from mirix.database.redis_client import get_redis_client
-
-            redis_client = get_redis_client()
-
-            for msg in detached_messages:
-                # Remove from Redis cache
-                if redis_client:
-                    redis_key = f"{redis_client.MESSAGE_PREFIX}{msg.id}"
-                    await redis_client.delete(redis_key)
-                await msg.hard_delete(session, actor=actor)
-                deleted_count += 1
-
-            await session.commit()
-            return deleted_count
+        # Reverse to chronological order
+        return [msg.to_pydantic() for msg in reversed(messages)]
 
-    @enforce_types
-    async def cleanup_all_detached_messages(self, actor: PydanticClient) -> Dict[str, int]:
+    async def hard_delete_user_messages_for_agent(
+        self,
+        agent_id: str,
+        user_id: str,
+        actor: PydanticClient,
+        keep_newest_n: int = 0,
+    ) -> int:
         """
-        Cleanup detached messages for all agents in the organization.
+        Hard-delete messages for a (agent, user) pair, optionally keeping the newest N.
+
+        Deletes from both the database and Redis cache.
 
         Args:
-            actor: The user performing this action
+            agent_id: The agent whose messages to prune
+            user_id: The user whose messages to prune
+            actor: Client performing the operation (for org scoping)
+            keep_newest_n: How many of the most-recent messages to retain. 0 = delete all.
 
         Returns:
-            Dict[str, int]: Dictionary mapping agent_id to number of messages deleted
+            Number of messages deleted
         """
-        from mirix.orm.agent import Agent as AgentModel
-
-        async with self.session_maker() as session:
-            # Get all agents for this organization
-            agents = await AgentModel.list(
-                db_session=session, organization_id=actor.organization_id, limit=None
-            )
+        from sqlalchemy import delete, desc
 
-            cleanup_results = {}
-            total_deleted = 0
-
-            for agent in agents:
-                # Get current message_ids for this agent
-                current_message_ids = set(agent.message_ids or [])
+        from mirix.database.redis_client import get_redis_client
 
-                # Find all messages for this agent
-                all_messages = await MessageModel.list(
-                    db_session=session,
-                    agent_id=agent.id,
-                    organization_id=actor.organization_id,
-                    limit=None,
+        async with self.session_maker() as session:
+            # Identify IDs to keep (the newest N)
+            keep_ids: set = set()
+            if keep_newest_n > 0:
+                keep_stmt = (
+                    select(MessageModel.id)
+                    .where(
+                        MessageModel.agent_id == agent_id,
+                        MessageModel.user_id == user_id,
+                        MessageModel.organization_id == actor.organization_id,
+                        MessageModel.is_deleted == False,
+                    )
+                    .order_by(desc(MessageModel.created_at), desc(MessageModel.id))
+                    .limit(keep_newest_n)
                 )
+                keep_result = await session.execute(keep_stmt)
+                keep_ids = {row[0] for row in keep_result.all()}
 
-                # Identify and delete detached messages
-                detached_messages = [msg for msg in all_messages if msg.id not in current_message_ids]
-
-                deleted_count = 0
-                from mirix.database.redis_client import get_redis_client
+            # Collect IDs that will be deleted (for cache invalidation)
+            select_stmt = select(MessageModel.id).where(
+                MessageModel.agent_id == agent_id,
+                MessageModel.user_id == user_id,
+                MessageModel.organization_id == actor.organization_id,
+                MessageModel.is_deleted == False,
+            )
+            if keep_ids:
+                select_stmt = select_stmt.where(MessageModel.id.not_in(keep_ids))
 
-                redis_client = get_redis_client()
+            id_result = await session.execute(select_stmt)
+            delete_ids = [row[0] for row in id_result.all()]
 
-                for msg in detached_messages:
-                    # Remove from Redis cache
-                    if redis_client:
-                        redis_key = f"{redis_client.MESSAGE_PREFIX}{msg.id}"
-                        await redis_client.delete(redis_key)
-                    await msg.hard_delete(session)
-                    deleted_count += 1
+            count = len(delete_ids)
+            if count == 0:
+                return 0
 
-                cleanup_results[agent.id] = deleted_count
-                total_deleted += deleted_count
+            # Bulk delete
+            del_stmt = delete(MessageModel).where(
+                MessageModel.agent_id == agent_id,
+                MessageModel.user_id == user_id,
+                MessageModel.organization_id == actor.organization_id,
+                MessageModel.is_deleted == False,
+            )
+            if keep_ids:
+                del_stmt = del_stmt.where(MessageModel.id.not_in(keep_ids))
 
+            await session.execute(del_stmt)
             await session.commit()
-            cleanup_results["total"] = total_deleted
-            return cleanup_results
+
+        # Evict from Redis cache
+        redis_client = get_redis_client()
+        if redis_client and delete_ids:
+            BATCH_SIZE = 1000
+            for i in range(0, len(delete_ids), BATCH_SIZE):
+                batch = [f"{redis_client.MESSAGE_PREFIX}{mid}" for mid in delete_ids[i : i + BATCH_SIZE]]
+                await redis_client.client.delete(*batch)
+
+        return count
diff --git a/mirix/services/user_manager.py b/mirix/services/user_manager.py
index 45f50144c..98d7fb278 100755
--- a/mirix/services/user_manager.py
+++ b/mirix/services/user_manager.py
@@ -285,26 +285,8 @@ async def delete_memories_by_user_id(self, user_id: str):
             block_count = await block_manager.delete_by_user_id(user_id=user_id)
             logger.debug("Bulk deleted %d blocks", block_count)
 
-            # Clear message_ids from ALL agents in PostgreSQL (messages are user-scoped, agents are client-scoped)
-            # IMPORTANT: Keep the first message (system message) as agents need it to function
-            # We need to clear message_ids from all agents that might have cached this user's messages
-            async with self.session_maker() as session:
-                from mirix.orm.agent import Agent as AgentModel
-
-                # Update ALL agents to keep only system messages
-                # (We can't know which agents have which user's messages, so clean all)
-                stmt = select(AgentModel)
-                result = await session.execute(stmt)
-                agents = result.scalars().all()
-
-                for agent in agents:
-                    if agent.message_ids and len(agent.message_ids) > 1:  # Has conversation messages
-                        agent.message_ids = [agent.message_ids[0]]  # Keep system message only
-
-                await session.commit()
-                logger.debug(
-                    "Cleared conversation message_ids from %d agents in PostgreSQL (kept system messages)", len(agents)
-                )
+            # Messages for this user are already deleted by delete_by_user_id above.
+            # No message_ids maintenance needed (column removed).
 
             # Invalidate agent caches that might reference deleted messages for this user
             from mirix.database.redis_client import get_redis_client
diff --git a/poetry.lock b/poetry.lock
index 287b9973e..9841e8ecf 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,39 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
+
+[[package]]
+name = "aiofiles"
+version = "25.1.0"
+description = "File support for asyncio."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695"},
+    {file = "aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2"},
+]
+
+[[package]]
+name = "aiogoogle"
+version = "5.17.0"
+description = "Async Google API client"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "aiogoogle-5.17.0-py3-none-any.whl", hash = "sha256:75f69810969bd16521896fb4dab784ee7a184ba84b898d7da6370a0682fc9179"},
+    {file = "aiogoogle-5.17.0.tar.gz", hash = "sha256:3206674d953478599d47587e19db0fc831119abb31ce5f1acde8807e0f0a48c6"},
+]
+
+[package.dependencies]
+aiofiles = "*"
+aiohttp = "*"
+async-timeout = "*"
+google-auth = "*"
+tonyg-rfc3339 = "*"
+
+[package.extras]
+curio-asks = ["asks", "curio"]
+trio-asks = ["asks", "trio"]
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -355,6 +390,24 @@ files = [
     {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
 ]
 
+[[package]]
+name = "asyncddgs"
+version = "0.1.0a1"
+description = "Asynchronous DuckDuckGo Search API: A FastAPI service for async access to DuckDuckGo’s text, image, video, and news searches. Uses a custom aDDGS class with aiohttp and asyncio for concurrent queries. Supports advanced syntax and proxies."
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "asyncddgs-0.1.0a1-py3-none-any.whl", hash = "sha256:4eeb32d08ab0347934c8487d2977a66f0f5ccf12e2c00a11ad8e927bff5e1595"},
+    {file = "asyncddgs-0.1.0a1.tar.gz", hash = "sha256:3f8e10feada699ab1b39a4716f08a65104c07de13ba059fc34edc7cc86aa313c"},
+]
+
+[package.dependencies]
+aiohttp = "*"
+fastapi = "*"
+lxml = "*"
+uvicorn = "*"
+
 [[package]]
 name = "asyncpg"
 version = "0.31.0"
@@ -1481,53 +1534,6 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto
 test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""]
 tqdm = ["tqdm"]
 
-[[package]]
-name = "google-api-core"
-version = "2.28.1"
-description = "Google API client core library"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "google_api_core-2.28.1-py3-none-any.whl", hash = "sha256:4021b0f8ceb77a6fb4de6fde4502cecab45062e66ff4f2895169e0b35bc9466c"},
-    {file = "google_api_core-2.28.1.tar.gz", hash = "sha256:2b405df02d68e68ce0fbc138559e6036559e685159d148ae5861013dc201baf8"},
-]
-
-[package.dependencies]
-google-auth = ">=2.14.1,<3.0.0"
-googleapis-common-protos = ">=1.56.2,<2.0.0"
-proto-plus = [
-    {version = ">=1.25.0,<2.0.0", markers = "python_version >= \"3.13\""},
-    {version = ">=1.22.3,<2.0.0", markers = "python_version < \"3.13\""},
-]
-protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<7.0.0"
-requests = ">=2.18.0,<3.0.0"
-
-[package.extras]
-async-rest = ["google-auth[aiohttp] (>=2.35.0,<3.0.0)"]
-grpc = ["grpcio (>=1.33.2,<2.0.0)", "grpcio (>=1.49.1,<2.0.0) ; python_version >= \"3.11\"", "grpcio (>=1.75.1,<2.0.0) ; python_version >= \"3.14\"", "grpcio-status (>=1.33.2,<2.0.0)", "grpcio-status (>=1.49.1,<2.0.0) ; python_version >= \"3.11\"", "grpcio-status (>=1.75.1,<2.0.0) ; python_version >= \"3.14\""]
-grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
-grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.0)"]
-
-[[package]]
-name = "google-api-python-client"
-version = "2.187.0"
-description = "Google API Client Library for Python"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "google_api_python_client-2.187.0-py3-none-any.whl", hash = "sha256:d8d0f6d85d7d1d10bdab32e642312ed572bdc98919f72f831b44b9a9cebba32f"},
-    {file = "google_api_python_client-2.187.0.tar.gz", hash = "sha256:e98e8e8f49e1b5048c2f8276473d6485febc76c9c47892a8b4d1afa2c9ec8278"},
-]
-
-[package.dependencies]
-google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0"
-google-auth = ">=1.32.0,<2.24.0 || >2.24.0,<2.25.0 || >2.25.0,<3.0.0"
-google-auth-httplib2 = ">=0.2.0,<1.0.0"
-httplib2 = ">=0.19.0,<1.0.0"
-uritemplate = ">=3.0.1,<5"
-
 [[package]]
 name = "google-auth"
 version = "2.43.0"
@@ -1555,22 +1561,6 @@ requests = ["requests (>=2.20.0,<3.0.0)"]
 testing = ["aiohttp (<3.10.0)", "aiohttp (>=3.6.2,<4.0.0)", "aioresponses", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (<39.0.0) ; python_version < \"3.8\"", "cryptography (>=38.0.3)", "cryptography (>=38.0.3)", "flask", "freezegun", "grpcio", "mock", "oauth2client", "packaging", "pyjwt (>=2.0)", "pyopenssl (<24.3.0)", "pyopenssl (>=20.0.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-localserver", "pyu2f (>=0.1.5)", "requests (>=2.20.0,<3.0.0)", "responses", "urllib3"]
 urllib3 = ["packaging", "urllib3"]
 
-[[package]]
-name = "google-auth-httplib2"
-version = "0.2.1"
-description = "Google Authentication Library: httplib2 transport"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "google_auth_httplib2-0.2.1-py3-none-any.whl", hash = "sha256:1be94c611db91c01f9703e7f62b0a59bbd5587a95571c7b6fade510d648bc08b"},
-    {file = "google_auth_httplib2-0.2.1.tar.gz", hash = "sha256:5ef03be3927423c87fb69607b42df23a444e434ddb2555b73b3679793187b7de"},
-]
-
-[package.dependencies]
-google-auth = ">=1.32.0,<3.0.0"
-httplib2 = ">=0.19.0,<1.0.0"
-
 [[package]]
 name = "google-auth-oauthlib"
 version = "1.2.2"
@@ -1651,6 +1641,8 @@ files = [
     {file = "greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d"},
     {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5"},
     {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f"},
+    {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7"},
+    {file = "greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8"},
     {file = "greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c"},
     {file = "greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2"},
     {file = "greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246"},
@@ -1660,6 +1652,8 @@ files = [
     {file = "greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8"},
     {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52"},
     {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa"},
+    {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c"},
+    {file = "greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5"},
     {file = "greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9"},
     {file = "greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd"},
     {file = "greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb"},
@@ -1669,6 +1663,8 @@ files = [
     {file = "greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0"},
     {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0"},
     {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f"},
+    {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0"},
+    {file = "greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d"},
     {file = "greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02"},
     {file = "greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31"},
     {file = "greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945"},
@@ -1678,6 +1674,8 @@ files = [
     {file = "greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671"},
     {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b"},
     {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae"},
+    {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b"},
+    {file = "greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929"},
     {file = "greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b"},
     {file = "greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0"},
     {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f"},
@@ -1685,6 +1683,8 @@ files = [
     {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1"},
     {file = "greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735"},
     {file = "greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337"},
+    {file = "greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269"},
+    {file = "greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681"},
     {file = "greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01"},
     {file = "greenlet-3.2.4-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:b6a7c19cf0d2742d0809a4c05975db036fdff50cd294a93632d6a310bf9ac02c"},
     {file = "greenlet-3.2.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:27890167f55d2387576d1f41d9487ef171849ea0359ce1510ca6e06c8bece11d"},
@@ -1694,6 +1694,8 @@ files = [
     {file = "greenlet-3.2.4-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9913f1a30e4526f432991f89ae263459b1c64d1608c0d22a5c79c287b3c70df"},
     {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b90654e092f928f110e0007f572007c9727b5265f7632c2fa7415b4689351594"},
     {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:81701fd84f26330f0d5f4944d4e92e61afe6319dcd9775e39396e39d7c3e5f98"},
+    {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:28a3c6b7cd72a96f61b0e4b2a36f681025b60ae4779cc73c1535eb5f29560b10"},
+    {file = "greenlet-3.2.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:52206cd642670b0b320a1fd1cbfd95bca0e043179c1d8a045f2c6109dfe973be"},
     {file = "greenlet-3.2.4-cp39-cp39-win32.whl", hash = "sha256:65458b409c1ed459ea899e939f0e1cdb14f58dbc803f2f93c5eab5694d32671b"},
     {file = "greenlet-3.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:d2e685ade4dafd447ede19c31277a224a239a0a1a4eca4e6390efedf20260cfb"},
     {file = "greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d"},
@@ -1865,7 +1867,7 @@ files = [
 
 [package.dependencies]
 grpcio = ">=1.66.2"
-protobuf = ">=5.26.1,<6.0dev"
+protobuf = ">=5.26.1,<6.0.dev0"
 setuptools = "*"
 
 [[package]]
@@ -2017,21 +2019,6 @@ http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
 trio = ["trio (>=0.22.0,<1.0)"]
 
-[[package]]
-name = "httplib2"
-version = "0.31.0"
-description = "A comprehensive HTTP client library."
-optional = false
-python-versions = ">=3.6"
-groups = ["main"]
-files = [
-    {file = "httplib2-0.31.0-py3-none-any.whl", hash = "sha256:b9cd78abea9b4e43a7714c6e0f8b6b8561a6fc1e95d5dbd367f5bf0ef35f5d24"},
-    {file = "httplib2-0.31.0.tar.gz", hash = "sha256:ac7ab497c50975147d4f7b1ade44becc7df2f8954d42b38b3d69c515f531135c"},
-]
-
-[package.dependencies]
-pyparsing = ">=3.0.4,<4"
-
 [[package]]
 name = "httptools"
 version = "0.7.1"
@@ -2232,8 +2219,8 @@ files = [
 ]
 
 [package.dependencies]
-decorator = {version = "*", markers = "python_version >= \"3.11\""}
-ipython = {version = ">=7.31.1", markers = "python_version >= \"3.11\""}
+decorator = {version = "*", markers = "python_version > \"3.6\""}
+ipython = {version = ">=7.31.1", markers = "python_version > \"3.6\""}
 tomli = {version = "*", markers = "python_version > \"3.6\" and python_version < \"3.11\""}
 
 [[package]]
@@ -2531,7 +2518,7 @@ files = [
 
 [package.dependencies]
 attrs = ">=22.2.0"
-jsonschema-specifications = ">=2023.03.6"
+jsonschema-specifications = ">=2023.3.6"
 referencing = ">=0.28.4"
 rpds-py = ">=0.7.1"
 
@@ -2605,7 +2592,7 @@ description = "Mypyc runtime library"
 optional = true
 python-versions = ">=3.9"
 groups = ["main"]
-markers = "extra == \"dev\" and platform_python_implementation != \"PyPy\""
+markers = "platform_python_implementation != \"PyPy\" and extra == \"dev\""
 files = [
     {file = "librt-0.7.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b45306a1fc5f53c9330fbee134d8b3227fe5da2ab09813b892790400aa49352d"},
     {file = "librt-0.7.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:864c4b7083eeee250ed55135d2127b260d7eb4b5e953a9e5df09c852e327961b"},
@@ -2959,6 +2946,162 @@ files = [
 [package.dependencies]
 llama-cloud-services = ">=0.6.54"
 
+[[package]]
+name = "lxml"
+version = "6.0.2"
+description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"},
+    {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c"},
+    {file = "lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a"},
+    {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c"},
+    {file = "lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b"},
+    {file = "lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0"},
+    {file = "lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5"},
+    {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607"},
+    {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178"},
+    {file = "lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c"},
+    {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7"},
+    {file = "lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46"},
+    {file = "lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078"},
+    {file = "lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285"},
+    {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456"},
+    {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0"},
+    {file = "lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6"},
+    {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322"},
+    {file = "lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849"},
+    {file = "lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f"},
+    {file = "lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6"},
+    {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77"},
+    {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6"},
+    {file = "lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2"},
+    {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314"},
+    {file = "lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2"},
+    {file = "lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7"},
+    {file = "lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf"},
+    {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe"},
+    {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37"},
+    {file = "lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a"},
+    {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c"},
+    {file = "lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b"},
+    {file = "lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed"},
+    {file = "lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8"},
+    {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d"},
+    {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d"},
+    {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272"},
+    {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f"},
+    {file = "lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312"},
+    {file = "lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca"},
+    {file = "lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c"},
+    {file = "lxml-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62"},
+    {file = "lxml-6.0.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f"},
+    {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7"},
+    {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3"},
+    {file = "lxml-6.0.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac"},
+    {file = "lxml-6.0.2-cp38-cp38-win32.whl", hash = "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512"},
+    {file = "lxml-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca"},
+    {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694"},
+    {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c"},
+    {file = "lxml-6.0.2-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84"},
+    {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb"},
+    {file = "lxml-6.0.2-cp39-cp39-win32.whl", hash = "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f"},
+    {file = "lxml-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304"},
+    {file = "lxml-6.0.2-cp39-cp39-win_arm64.whl", hash = "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d"},
+    {file = "lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a"},
+    {file = "lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e"},
+    {file = "lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62"},
+]
+
+[package.extras]
+cssselect = ["cssselect (>=0.7)"]
+html-clean = ["lxml_html_clean"]
+html5 = ["html5lib"]
+htmlsoup = ["BeautifulSoup4"]
+
 [[package]]
 name = "markdown"
 version = "3.10"
@@ -3794,8 +3937,8 @@ files = [
 [package.dependencies]
 googleapis-common-protos = ">=1.57,<2.0"
 grpcio = [
-    {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
     {version = ">=1.63.2,<2.0.0", markers = "python_version < \"3.13\""},
+    {version = ">=1.66.2,<2.0.0", markers = "python_version >= \"3.13\""},
 ]
 opentelemetry-api = ">=1.15,<2.0"
 opentelemetry-exporter-otlp-proto-common = "1.38.0"
@@ -3989,9 +4132,9 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
     {version = ">=1.22.4", markers = "python_version < \"3.11\""},
     {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -4393,24 +4536,6 @@ files = [
     {file = "propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d"},
 ]
 
-[[package]]
-name = "proto-plus"
-version = "1.26.1"
-description = "Beautiful, Pythonic protocol buffers"
-optional = false
-python-versions = ">=3.7"
-groups = ["main"]
-files = [
-    {file = "proto_plus-1.26.1-py3-none-any.whl", hash = "sha256:13285478c2dcf2abb829db158e1047e2f1e8d63a077d94263c2b88b043c75a66"},
-    {file = "proto_plus-1.26.1.tar.gz", hash = "sha256:21a515a4c4c0088a773899e23c7bbade3d18f9c66c73edd4c7ee3816bc96a012"},
-]
-
-[package.dependencies]
-protobuf = ">=3.19.0,<7.0.0"
-
-[package.extras]
-testing = ["google-api-core (>=1.31.5)"]
-
 [[package]]
 name = "protobuf"
 version = "5.29.5"
@@ -4864,21 +4989,6 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
 docs = ["sphinx", "sphinx-rtd-theme", "zope.interface"]
 tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
 
-[[package]]
-name = "pyparsing"
-version = "3.2.5"
-description = "pyparsing - Classes and methods to define and execute parsing grammars"
-optional = false
-python-versions = ">=3.9"
-groups = ["main"]
-files = [
-    {file = "pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e"},
-    {file = "pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6"},
-]
-
-[package.extras]
-diagrams = ["jinja2", "railroad-diagrams"]
-
 [[package]]
 name = "pypdf"
 version = "6.3.0"
@@ -5973,69 +6083,75 @@ whisper-local = ["openai-whisper", "soundfile"]
 
 [[package]]
 name = "sqlalchemy"
-version = "2.0.44"
+version = "2.0.48"
 description = "Database Abstraction Library"
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "SQLAlchemy-2.0.44-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:471733aabb2e4848d609141a9e9d56a427c0a038f4abf65dd19d7a21fd563632"},
-    {file = "SQLAlchemy-2.0.44-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48bf7d383a35e668b984c805470518b635d48b95a3c57cb03f37eaa3551b5f9f"},
-    {file = "SQLAlchemy-2.0.44-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bf4bb6b3d6228fcf3a71b50231199fb94d2dd2611b66d33be0578ea3e6c2726"},
-    {file = "SQLAlchemy-2.0.44-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:e998cf7c29473bd077704cea3577d23123094311f59bdc4af551923b168332b1"},
-    {file = "SQLAlchemy-2.0.44-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ebac3f0b5732014a126b43c2b7567f2f0e0afea7d9119a3378bde46d3dcad88e"},
-    {file = "SQLAlchemy-2.0.44-cp37-cp37m-win32.whl", hash = "sha256:3255d821ee91bdf824795e936642bbf43a4c7cedf5d1aed8d24524e66843aa74"},
-    {file = "SQLAlchemy-2.0.44-cp37-cp37m-win_amd64.whl", hash = "sha256:78e6c137ba35476adb5432103ae1534f2f5295605201d946a4198a0dea4b38e7"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c77f3080674fc529b1bd99489378c7f63fcb4ba7f8322b79732e0258f0ea3ce"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c26ef74ba842d61635b0152763d057c8d48215d5be9bb8b7604116a059e9985"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4a172b31785e2f00780eccab00bc240ccdbfdb8345f1e6063175b3ff12ad1b0"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9480c0740aabd8cb29c329b422fb65358049840b34aba0adf63162371d2a96e"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:17835885016b9e4d0135720160db3095dc78c583e7b902b6be799fb21035e749"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cbe4f85f50c656d753890f39468fcd8190c5f08282caf19219f684225bfd5fd2"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-win32.whl", hash = "sha256:2fcc4901a86ed81dc76703f3b93ff881e08761c63263c46991081fd7f034b165"},
-    {file = "sqlalchemy-2.0.44-cp310-cp310-win_amd64.whl", hash = "sha256:9919e77403a483ab81e3423151e8ffc9dd992c20d2603bf17e4a8161111e55f5"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fe3917059c7ab2ee3f35e77757062b1bea10a0b6ca633c58391e3f3c6c488dd"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de4387a354ff230bc979b46b2207af841dc8bf29847b6c7dbe60af186d97aefa"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3678a0fb72c8a6a29422b2732fe423db3ce119c34421b5f9955873eb9b62c1e"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cf6872a23601672d61a68f390e44703442639a12ee9dd5a88bbce52a695e46e"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:329aa42d1be9929603f406186630135be1e7a42569540577ba2c69952b7cf399"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:70e03833faca7166e6a9927fbee7c27e6ecde436774cd0b24bbcc96353bce06b"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-win32.whl", hash = "sha256:253e2f29843fb303eca6b2fc645aca91fa7aa0aa70b38b6950da92d44ff267f3"},
-    {file = "sqlalchemy-2.0.44-cp311-cp311-win_amd64.whl", hash = "sha256:7a8694107eb4308a13b425ca8c0e67112f8134c846b6e1f722698708741215d5"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:72fea91746b5890f9e5e0997f16cbf3d53550580d76355ba2d998311b17b2250"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:585c0c852a891450edbb1eaca8648408a3cc125f18cf433941fa6babcc359e29"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b94843a102efa9ac68a7a30cd46df3ff1ed9c658100d30a725d10d9c60a2f44"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:119dc41e7a7defcefc57189cfa0e61b1bf9c228211aba432b53fb71ef367fda1"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0765e318ee9179b3718c4fd7ba35c434f4dd20332fbc6857a5e8df17719c24d7"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e7b5b079055e02d06a4308d0481658e4f06bc7ef211567edc8f7d5dce52018d"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-win32.whl", hash = "sha256:846541e58b9a81cce7dee8329f352c318de25aa2f2bbe1e31587eb1f057448b4"},
-    {file = "sqlalchemy-2.0.44-cp312-cp312-win_amd64.whl", hash = "sha256:7cbcb47fd66ab294703e1644f78971f6f2f1126424d2b300678f419aa73c7b6e"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ff486e183d151e51b1d694c7aa1695747599bb00b9f5f604092b54b74c64a8e1"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b1af8392eb27b372ddb783b317dea0f650241cea5bd29199b22235299ca2e45"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b61188657e3a2b9ac4e8f04d6cf8e51046e28175f79464c67f2fd35bceb0976"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b87e7b91a5d5973dda5f00cd61ef72ad75a1db73a386b62877d4875a8840959c"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:15f3326f7f0b2bfe406ee562e17f43f36e16167af99c4c0df61db668de20002d"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1e77faf6ff919aa8cd63f1c4e561cac1d9a454a191bb864d5dd5e545935e5a40"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-win32.whl", hash = "sha256:ee51625c2d51f8baadf2829fae817ad0b66b140573939dd69284d2ba3553ae73"},
-    {file = "sqlalchemy-2.0.44-cp313-cp313-win_amd64.whl", hash = "sha256:c1c80faaee1a6c3428cecf40d16a2365bcf56c424c92c2b6f0f9ad204b899e9e"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2fc44e5965ea46909a416fff0af48a219faefd5773ab79e5f8a5fcd5d62b2667"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:dc8b3850d2a601ca2320d081874033684e246d28e1c5e89db0864077cfc8f5a9"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d733dec0614bb8f4bcb7c8af88172b974f685a31dc3a65cca0527e3120de5606"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22be14009339b8bc16d6b9dc8780bacaba3402aa7581658e246114abbd2236e3"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:357bade0e46064f88f2c3a99808233e67b0051cdddf82992379559322dfeb183"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:4848395d932e93c1595e59a8672aa7400e8922c39bb9b0668ed99ac6fa867822"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-win32.whl", hash = "sha256:2f19644f27c76f07e10603580a47278abb2a70311136a7f8fd27dc2e096b9013"},
-    {file = "sqlalchemy-2.0.44-cp38-cp38-win_amd64.whl", hash = "sha256:1df4763760d1de0dfc8192cc96d8aa293eb1a44f8f7a5fbe74caf1b551905c5e"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f7027414f2b88992877573ab780c19ecb54d3a536bef3397933573d6b5068be4"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3fe166c7d00912e8c10d3a9a0ce105569a31a3d0db1a6e82c4e0f4bf16d5eca9"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3caef1ff89b1caefc28f0368b3bde21a7e3e630c2eddac16abd9e47bd27cc36a"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc2856d24afa44295735e72f3c75d6ee7fdd4336d8d3a8f3d44de7aa6b766df2"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:11bac86b0deada30b6b5f93382712ff0e911fe8d31cb9bf46e6b149ae175eff0"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4d18cd0e9a0f37c9f4088e50e3839fcb69a380a0ec957408e0b57cff08ee0a26"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-win32.whl", hash = "sha256:9e9018544ab07614d591a26c1bd4293ddf40752cc435caf69196740516af7100"},
-    {file = "sqlalchemy-2.0.44-cp39-cp39-win_amd64.whl", hash = "sha256:8e0e4e66fd80f277a8c3de016a81a554e76ccf6b8d881ee0b53200305a8433f6"},
-    {file = "sqlalchemy-2.0.44-py3-none-any.whl", hash = "sha256:19de7ca1246fbef9f9d1bff8f1ab25641569df226364a0e40457dc5457c54b05"},
-    {file = "sqlalchemy-2.0.44.tar.gz", hash = "sha256:0ae7454e1ab1d780aee69fd2aae7d6b8670a581d8847f2d1e0f7ddfbf47e5a22"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7001dc9d5f6bb4deb756d5928eaefe1930f6f4179da3924cbd95ee0e9f4dce89"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1a89ce07ad2d4b8cfc30bd5889ec40613e028ed80ef47da7d9dd2ce969ad30e0"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10853a53a4a00417a00913d270dddda75815fcb80675874285f41051c094d7dd"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:fac0fa4e4f55f118fd87177dacb1c6522fe39c28d498d259014020fec9164c29"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3713e21ea67bca727eecd4a24bf68bcd414c403faae4989442be60994301ded0"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-win32.whl", hash = "sha256:d404dc897ce10e565d647795861762aa2d06ca3f4a728c5e9a835096c7059018"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-win_amd64.whl", hash = "sha256:841a94c66577661c1f088ac958cd767d7c9bf507698f45afffe7a4017049de76"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b4c575df7368b3b13e0cebf01d4679f9a28ed2ae6c1cd0b1d5beffb6b2007dc"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e83e3f959aaa1c9df95c22c528096d94848a1bc819f5d0ebf7ee3df0ca63db6c"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f7b7243850edd0b8b97043f04748f31de50cf426e939def5c16bedb540698f7"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82745b03b4043e04600a6b665cb98697c4339b24e34d74b0a2ac0a2488b6f94d"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5e088bf43f6ee6fec7dbf1ef7ff7774a616c236b5c0cb3e00662dd71a56b571"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-win32.whl", hash = "sha256:9c7d0a77e36b5f4b01ca398482230ab792061d243d715299b44a0b55c89fe617"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-win_amd64.whl", hash = "sha256:583849c743e0e3c9bb7446f5b5addeacedc168d657a69b418063dfdb2d90081c"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:348174f228b99f33ca1f773e85510e08927620caa59ffe7803b37170df30332b"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53667b5f668991e279d21f94ccfa6e45b4e3f4500e7591ae59a8012d0f010dcb"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34634e196f620c7a61d18d5cf7dc841ca6daa7961aed75d532b7e58b309ac894"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:546572a1793cc35857a2ffa1fe0e58571af1779bcc1ffa7c9fb0839885ed69a9"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:07edba08061bc277bfdc772dd2a1a43978f5a45994dd3ede26391b405c15221e"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-win32.whl", hash = "sha256:908a3fa6908716f803b86896a09a2c4dde5f5ce2bb07aacc71ffebb57986ce99"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-win_amd64.whl", hash = "sha256:68549c403f79a8e25984376480959975212a670405e3913830614432b5daa07a"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e3070c03701037aa418b55d36532ecb8f8446ed0135acb71c678dbdf12f5b6e4"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2645b7d8a738763b664a12a1542c89c940daa55196e8d73e55b169cc5c99f65f"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b19151e76620a412c2ac1c6f977ab1b9fa7ad43140178345136456d5265b32ed"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b193a7e29fd9fa56e502920dca47dffe60f97c863494946bd698c6058a55658"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:36ac4ddc3d33e852da9cb00ffb08cea62ca05c39711dc67062ca2bb1fae35fd8"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-win32.whl", hash = "sha256:389b984139278f97757ea9b08993e7b9d1142912e046ab7d82b3fbaeb0209131"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-win_amd64.whl", hash = "sha256:d612c976cbc2d17edfcc4c006874b764e85e990c29ce9bd411f926bbfb02b9a2"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69f5bc24904d3bc3640961cddd2523e361257ef68585d6e364166dfbe8c78fae"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd08b90d211c086181caed76931ecfa2bdfc83eea3cfccdb0f82abc6c4b876cb"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1ccd42229aaac2df431562117ac7e667d702e8e44afdb6cf0e50fa3f18160f0b"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0dcbc588cd5b725162c076eb9119342f6579c7f7f55057bb7e3c6ff27e13121"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-win32.whl", hash = "sha256:9764014ef5e58aab76220c5664abb5d47d5bc858d9debf821e55cfdd0f128485"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-win_amd64.whl", hash = "sha256:e2f35b4cccd9ed286ad62e0a3c3ac21e06c02abc60e20aa51a3e305a30f5fa79"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e2d0d88686e3d35a76f3e15a34e8c12d73fc94c1dea1cd55782e695cc14086dd"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49b7bddc1eebf011ea5ab722fdbe67a401caa34a350d278cc7733c0e88fecb1f"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:426c5ca86415d9b8945c7073597e10de9644802e2ff502b8e1f11a7a2642856b"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:288937433bd44e3990e7da2402fabc44a3c6c25d3704da066b85b89a85474ae0"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8183dc57ae7d9edc1346e007e840a9f3d6aa7b7f165203a99e16f447150140d2"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-win32.whl", hash = "sha256:1182437cb2d97988cfea04cf6cdc0b0bb9c74f4d56ec3d08b81e23d621a28cc6"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-win_amd64.whl", hash = "sha256:144921da96c08feb9e2b052c5c5c1d0d151a292c6135623c6b2c041f2a45f9e0"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5aee45fd2c6c0f2b9cdddf48c48535e7471e42d6fb81adfde801da0bd5b93241"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cddca31edf8b0653090cbb54562ca027c421c58ddde2c0685f49ff56a1690e0"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7a936f1bb23d370b7c8cc079d5fce4c7d18da87a33c6744e51a93b0f9e97e9b3"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e004aa9248e8cb0a5f9b96d003ca7c1c0a5da8decd1066e7b53f59eb8ce7c62b"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-win32.whl", hash = "sha256:b8438ec5594980d405251451c5b7ea9aa58dda38eb7ac35fb7e4c696712ee24f"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-win_amd64.whl", hash = "sha256:d854b3970067297f3a7fbd7a4683587134aa9b3877ee15aa29eea478dc68f933"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8649a14caa5f8a243628b1d61cf530ad9ae4578814ba726816adb1121fc493e"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6bb85c546591569558571aa1b06aba711b26ae62f111e15e56136d69920e1616"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6b764fb312bd35e47797ad2e63f0d323792837a6ac785a4ca967019357d2bc7"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:7c998f2ace8bf76b453b75dbcca500d4f4b9dd3908c13e89b86289b37784848b"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d64177f443594c8697369c10e4bbcac70ef558e0f7921a1de7e4a3d1734bcf67"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-win32.whl", hash = "sha256:01f6bbd4308b23240cf7d3ef117557c8fd097ec9549d5d8a52977544e35b40ad"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-win_amd64.whl", hash = "sha256:858e433f12b0e5b3ed2f8da917433b634f4937d0e8793e5cb33c54a1a01df565"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4599a95f9430ae0de82b52ff0d27304fe898c17cb5f4099f7438a51b9998ac77"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f27f9da0a7d22b9f981108fd4b62f8b5743423388915a563e651c20d06c1f457"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8fcccbbc0c13c13702c471da398b8cd72ba740dca5859f148ae8e0e8e0d3e7e"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a5b429eb84339f9f05e06083f119ad814e6d85e27ecbdf9c551dfdbb128eaf8a"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bcb8ebbf2e2c36cfe01a94f2438012c6a9d494cf80f129d9753bcdf33bfc35a6"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-win32.whl", hash = "sha256:e214d546c8ecb5fc22d6e6011746082abf13a9cf46eefb45769c7b31407c97b5"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-win_amd64.whl", hash = "sha256:b8fc3454b4f3bd0a368001d0e968852dad45a873f8b4babd41bc302ec851a099"},
+    {file = "sqlalchemy-2.0.48-py3-none-any.whl", hash = "sha256:a66fe406437dd65cacd96a72689a3aaaecaebbcd62d81c5ac1c0fdbeac835096"},
+    {file = "sqlalchemy-2.0.48.tar.gz", hash = "sha256:5ca74f37f3369b45e1f6b7b06afb182af1fd5dde009e4ffd831830d98cbe5fe7"},
 ]
 
 [package.dependencies]
@@ -6360,6 +6476,17 @@ files = [
     {file = "tomli-2.3.0.tar.gz", hash = "sha256:64be704a875d2a59753d80ee8a533c3fe183e3f06807ff7dc2232938ccb01549"},
 ]
 
+[[package]]
+name = "tonyg-rfc3339"
+version = "0.1"
+description = "Python implementation of RFC 3339"
+optional = false
+python-versions = "*"
+groups = ["main"]
+files = [
+    {file = "tonyg-rfc3339-0.1.tar.gz", hash = "sha256:e424e7b4ddf2a2f5c70d7317faecf9b69b7da099c9fc08d046c3ac679dd30d3d"},
+]
+
 [[package]]
 name = "tqdm"
 version = "4.67.1"
@@ -6453,18 +6580,6 @@ files = [
     {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"},
 ]
 
-[[package]]
-name = "uritemplate"
-version = "4.2.0"
-description = "Implementation of RFC 6570 URI Templates"
-optional = false
-python-versions = ">=3.9"
-groups = ["main"]
-files = [
-    {file = "uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686"},
-    {file = "uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e"},
-]
-
 [[package]]
 name = "urllib3"
 version = "2.5.0"
@@ -6517,7 +6632,7 @@ description = "Fast implementation of asyncio event loop on top of libuv"
 optional = false
 python-versions = ">=3.8.1"
 groups = ["main"]
-markers = "sys_platform != \"win32\" and sys_platform != \"cygwin\" and platform_python_implementation != \"PyPy\""
+markers = "platform_python_implementation != \"PyPy\" and sys_platform != \"win32\" and sys_platform != \"cygwin\""
 files = [
     {file = "uvloop-0.22.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c"},
     {file = "uvloop-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792"},
@@ -7069,4 +7184,4 @@ voice = ["SpeechRecognition", "pydub"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<4.0"
-content-hash = "0faeb470f574a981326b51a4a67708ca5d5ca98a163a0015993b0e1adc425f5e"
+content-hash = "072f05ab94767259065f32c18ac0758fb52a8d36de1fbea7f76b15ef90f454aa"
diff --git a/pyproject.toml b/pyproject.toml
index 2477ff926..11cd54c97 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
     "demjson3",
     "pathvalidate",
     "docstring_parser",
-    "sqlalchemy",
+    "sqlalchemy[asyncio] (>=2.0.48,<3.0.0)",
     "asyncpg",
     "aiosqlite",
     "httpx",
@@ -127,7 +127,7 @@ exclude = ["tests*", "scripts*", "frontend*", "public_evaluations*", "mirix_env*
 [tool.setuptools.package-data]
 mirix = [
     "*.yaml",
-    "*.yml", 
+    "*.yml",
     "*.txt",
     "configs/*.yaml",
     "configs/*.yml",
@@ -214,7 +214,7 @@ python_classes = ["Test*"]
 python_functions = ["test_*"]
 addopts = "-v --tb=short"
 asyncio_mode = "auto"
-asyncio_default_fixture_loop_scope = "session"
+asyncio_default_fixture_loop_scope = "module"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/scripts/migrations/001_add_message_set_retention_count.sql b/scripts/migrations/001_add_message_set_retention_count.sql
new file mode 100644
index 000000000..07f552d95
--- /dev/null
+++ b/scripts/migrations/001_add_message_set_retention_count.sql
@@ -0,0 +1,12 @@
+-- Migration 001: Additive schema changes (run BEFORE merging the code)
+-- Safe to run on a live database — all changes are backward-compatible.
+-- After running this script, merge the code PR.
+
+-- 1. Add message_set_retention_count to clients table
+ALTER TABLE clients
+    ADD COLUMN IF NOT EXISTS message_set_retention_count INTEGER DEFAULT 0;
+
+-- 2. Add composite index for efficient retention queries on messages
+--    Supports: ORDER BY created_at DESC, id DESC WHERE agent_id=? AND user_id=?
+CREATE INDEX IF NOT EXISTS ix_messages_agent_user_created_at
+    ON messages (agent_id, user_id, created_at, id);
diff --git a/scripts/migrations/002_cleanup_message_ids.sql b/scripts/migrations/002_cleanup_message_ids.sql
new file mode 100644
index 000000000..841197a20
--- /dev/null
+++ b/scripts/migrations/002_cleanup_message_ids.sql
@@ -0,0 +1,10 @@
+-- Migration 002: Cleanup (run AFTER the code has been deployed and verified)
+-- WARNING: Destructive — drops the message_ids column and deletes legacy system messages.
+-- Ensure the new code is running correctly before executing this script.
+
+-- 1. Delete legacy system messages stored as Message rows
+--    (system prompt now lives exclusively in agent_state.system)
+DELETE FROM messages WHERE role = 'system';
+
+-- 2. Drop the message_ids column from agents table
+ALTER TABLE agents DROP COLUMN IF EXISTS message_ids;
diff --git a/tests/conftest.py b/tests/conftest.py
index 0e5d2428e..b547e3123 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,8 +1,9 @@
 """
 Shared test fixtures for Mirix.
 
-Provides a session-scoped API key tied to a test client, so integration tests can
-authenticate against the REST API without passing X-Client-ID.
+Provides:
+- Module-scoped engine reset (NullPool) so each test module gets fresh DB connections
+- Session-scoped API key tied to a test client for integration tests
 """
 
 import asyncio
@@ -10,12 +11,57 @@
 from typing import Optional
 
 import pytest
+import pytest_asyncio
 
 from mirix.schemas.client import Client as PydanticClient
 from mirix.schemas.organization import Organization as PydanticOrganization
 from mirix.security.api_keys import generate_api_key
 from mirix.services.client_manager import ClientManager
 from mirix.services.organization_manager import OrganizationManager
+from mirix.settings import settings
+
+@pytest_asyncio.fixture(scope="module", autouse=True)
+async def _reset_engine_per_module():
+    """Dispose and recreate the async engine with NullPool at the start of
+    each test module so every module's event loop gets fresh DB connections.
+
+    NullPool creates a new connection per session and closes it immediately,
+    preventing stale connections from a previous module's (now-closed) loop.
+    """
+    import mirix.server.server as server_module
+
+    if (
+        hasattr(server_module, "engine")
+        and server_module.engine is not None
+        and "asyncpg" in str(server_module.engine.url)
+    ):
+        from sqlalchemy.ext.asyncio import (
+            AsyncSession,
+            async_sessionmaker,
+            create_async_engine,
+        )
+        from sqlalchemy.pool import NullPool
+
+        await server_module.engine.dispose()
+        _pg_uri = (
+            settings.mirix_pg_uri.replace(
+                "postgresql+pg8000://", "postgresql+asyncpg://"
+            ).replace("postgresql://", "postgresql+asyncpg://")
+        )
+        server_module.engine = create_async_engine(
+            _pg_uri, poolclass=NullPool, echo=settings.pg_echo
+        )
+        server_module.AsyncSessionLocal = async_sessionmaker(
+            bind=server_module.engine,
+            class_=AsyncSession,
+            autocommit=False,
+            autoflush=False,
+            expire_on_commit=False,
+        )
+
+    await server_module.ensure_tables_created()
+    yield
+
 
 TEST_ORG_ID = "demo-org"
 TEST_CLIENT_ID = "demo-client-id"
diff --git a/tests/test_agent_prompt_update.py b/tests/test_agent_prompt_update.py
index ffa135e23..4612a7c4c 100644
--- a/tests/test_agent_prompt_update.py
+++ b/tests/test_agent_prompt_update.py
@@ -4,7 +4,7 @@
 Tests system prompt updates for all memory agent types:
 - Episodic, Semantic, Core, Procedural, Resource, Knowledge Vault, Reflexion, Meta Memory
 - Verifies updates in: running agents, PostgreSQL database, Redis cache
-- Verifies system message (message_ids[0]) is updated
+- Verifies agent.system field is updated in DB and cache
 
 Prerequisites:
     export GEMINI_API_KEY=your_api_key_here
@@ -224,23 +224,6 @@ async def get_agent_direct_from_api(client: MirixClient, agent_name: str):
     return None
 
 
-def get_system_message_id(agent) -> str:
-    """
-    Get the system message ID from agent's message_ids.
-
-    The system message is always the first message (message_ids[0]).
-
-    Args:
-        agent: AgentState object
-
-    Returns:
-        str: System message ID, or empty string if no messages
-    """
-    if agent.message_ids and len(agent.message_ids) > 0:
-        return agent.message_ids[0]
-    return ""
-
-
 # Agent names to test (short names)
 AGENT_NAMES = [
     "episodic",
@@ -261,7 +244,7 @@ async def test_update_agent_system_prompt(client, agent_name):
 
     Verifies:
     1. System prompt is updated in the agent state
-    2. System message (message_ids[0]) is updated
+    2. agent.system field is updated
     3. Changes are persisted in the database
     4. Changes are reflected in Redis cache (via subsequent reads)
 
@@ -283,11 +266,6 @@ async def test_update_agent_system_prompt(client, agent_name):
     print(f"[OK] Found agent: {original_agent.name} (ID: {original_agent.id})")
     print(f"     Original system prompt: {original_agent.system[:80]}...")
 
-    # Get original system message ID
-    original_message_id = get_system_message_id(original_agent)
-    if original_message_id:
-        print(f"     Original system message ID: {original_message_id}")
-
     # Step 2: Update system prompt
     print(f"\n[Step 2] Updating system prompt for '{agent_name}' agent...")
 
@@ -316,12 +294,6 @@ async def test_update_agent_system_prompt(client, agent_name):
     assert updated_agent.system == new_system_prompt, "System prompt in returned agent should match the new prompt"
     print(f"[OK] System prompt matches in returned state")
 
-    # Verify system message ID changed
-    new_message_id = updated_agent.message_ids[0] if updated_agent.message_ids else None
-    if original_message_id and new_message_id:
-        assert new_message_id != original_message_id, "System message ID (message_ids[0]) should have changed"
-        print(f"[OK] System message ID changed: {original_message_id} → {new_message_id}")
-
     # Step 4: Wait for cache and database to sync
     print(f"\n[Step 4] Waiting 2 seconds for cache/database sync...")
     time.sleep(2)
@@ -336,11 +308,6 @@ async def test_update_agent_system_prompt(client, agent_name):
     print(f"[OK] System prompt persisted in cache")
     print(f"     Cached prompt: {refetched_agent.system[:80]}...")
 
-    # Verify message_ids[0] is still the new one
-    cached_message_id = refetched_agent.message_ids[0] if refetched_agent.message_ids else None
-    assert cached_message_id == new_message_id, "System message ID should persist in cache"
-    print(f"[OK] System message ID persisted: {cached_message_id}")
-
     # Step 6: Verify system prompt in agent state
     print(f"\n[Step 6] Verifying system prompt is stored correctly...")
 
@@ -426,7 +393,7 @@ async def test_update_same_agent_multiple_times(client):
     Verifies:
     1. Multiple updates to the same agent work correctly
     2. Each update creates a new system message
-    3. message_ids[0] is updated each time
+    3. agent.system is updated each time
     """
     print("\n" + "=" * 70)
     print("TEST: Multiple Updates to Same Agent")
@@ -436,7 +403,6 @@ async def test_update_same_agent_multiple_times(client):
 
     print(f"\n[Test] Updating '{agent_name}' agent 3 times in succession...")
 
-    previous_message_id = None
     previous_prompt = None
 
     for i in range(1, 4):
@@ -451,18 +417,11 @@ async def test_update_same_agent_multiple_times(client):
         assert updated.system == new_prompt, f"Update {i} should apply new prompt"
         print(f"    ✓ Prompt updated")
 
-        # Verify message_ids[0] changed
-        current_message_id = updated.message_ids[0] if updated.message_ids else None
-        if previous_message_id:
-            assert current_message_id != previous_message_id, f"Update {i} should create new system message"
-            print(f"    ✓ Message ID changed: {previous_message_id[:20]}... → {current_message_id[:20]}...")
-
         # Verify prompt is different from previous
         if previous_prompt:
             assert updated.system != previous_prompt, f"Update {i} should change prompt from previous"
             print(f"    ✓ Prompt changed from previous")
 
-        previous_message_id = current_message_id
         previous_prompt = new_prompt
 
         # Small delay between updates
diff --git a/tests/test_block_filter_tag_updates.py b/tests/test_block_filter_tag_updates.py
index e8d22436b..1742f7603 100644
--- a/tests/test_block_filter_tag_updates.py
+++ b/tests/test_block_filter_tag_updates.py
@@ -30,14 +30,6 @@
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
 
-# One event loop per module for integration tests (avoids "Future attached to
-# a different loop" / "another operation is in progress").
-@pytest_asyncio.fixture(scope="module")
-def event_loop():
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
 from mirix.queue.queue_util import put_messages
 from mirix.schemas.client import Client
 from mirix.schemas.enums import MessageRole
diff --git a/tests/test_block_filter_tags_update_mode.py b/tests/test_block_filter_tags_update_mode.py
index 981ecdb2d..be875c1bc 100644
--- a/tests/test_block_filter_tags_update_mode.py
+++ b/tests/test_block_filter_tags_update_mode.py
@@ -30,14 +30,6 @@
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
 
-# One event loop per module for integration tests (avoids "Future attached to
-# a different loop" / "another operation is in progress").
-@pytest_asyncio.fixture(scope="module")
-def event_loop():
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
 from mirix.queue.queue_util import put_messages
 from mirix.schemas.client import Client
 from mirix.schemas.enums import MessageRole
diff --git a/tests/test_filter_tags_db.py b/tests/test_filter_tags_db.py
index 25f399af0..5271705c8 100644
--- a/tests/test_filter_tags_db.py
+++ b/tests/test_filter_tags_db.py
@@ -26,13 +26,6 @@
 project_root = Path(__file__).parent.parent
 sys.path.insert(0, str(project_root))
 
-# One event loop per module to avoid "another operation is in progress".
-@pytest_asyncio.fixture(scope="module")
-def event_loop():
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
 from mirix.schemas.client import Client as PydanticClient
 from mirix.schemas.raw_memory import RawMemoryItemCreate
 from mirix.schemas.user import User as PydanticUser
diff --git a/tests/test_memory_integration.py b/tests/test_memory_integration.py
index b8586df57..ff207fb91 100644
--- a/tests/test_memory_integration.py
+++ b/tests/test_memory_integration.py
@@ -276,5 +276,360 @@ async def test_search(client):
     print("[OK] All search tests completed")
 
 
+# =================================================================
+# MESSAGE LIFECYCLE INTEGRATION TESTS
+#
+# Verify the system's message persistence contracts:
+# - System prompts live on the agent, not as message rows
+# - Retention=0 clients leave no message rows after processing
+# - Retention=N clients keep exactly N message-sets, pruning older ones
+# - Failed processing (e.g. context overflow) leaves no partial state
+# =================================================================
+
+MSG_TEST_USER_ID = "msg-lifecycle-user"
+MSG_TEST_CLIENT_ID = "msg-lifecycle-client"
+
+
+@pytest_asyncio.fixture(scope="module", loop_scope="module")
+async def msg_api_auth(server_process):
+    """Provision a dedicated client for message lifecycle tests."""
+    from conftest import _create_client_and_key
+
+    auth = await _create_client_and_key(
+        MSG_TEST_CLIENT_ID, TEST_ORG_ID, org_name="Demo Org"
+    )
+    return auth
+
+
+@pytest_asyncio.fixture(scope="module", loop_scope="module")
+async def msg_client(server_process, msg_api_auth):
+    """MirixClient for message lifecycle tests, initialized once per module."""
+    c = await MirixClient.create(
+        api_key=msg_api_auth["api_key"],
+        base_url="http://localhost:8000",
+        debug=False,
+    )
+    config_path = project_root / "mirix" / "configs" / "examples" / "mirix_gemini.yaml"
+    await c.initialize_meta_agent(
+        config_path=str(config_path), update_agents=False
+    )
+    await c.create_or_get_user(
+        user_id=MSG_TEST_USER_ID, user_name="Message Lifecycle User"
+    )
+    return c
+
+
+def _get_server():
+    """Import and return the singleton AsyncServer."""
+    from mirix.server.rest_api import get_server
+
+    return get_server()
+
+
+async def _get_message_rows(agent_id: str, user_id: str, org_id: str):
+    """Query the messages table for a given (agent, user) pair.
+
+    Returns all non-deleted message rows in chronological order.
+    """
+    from mirix.services.message_manager import MessageManager
+    from mirix.schemas.client import Client
+
+    mm = MessageManager()
+    actor = Client(
+        id="query-actor",
+        organization_id=org_id,
+        name="query",
+        status="active",
+        write_scope="test",
+        read_scopes=["test"],
+    )
+    return await mm.get_messages_for_agent_user(
+        agent_id=agent_id,
+        user_id=user_id,
+        actor=actor,
+        limit=10000,
+    )
+
+
+async def _get_sub_agent_ids(client: MirixClient):
+    """Return a dict mapping short agent name -> agent_id."""
+    top_level = await client.list_agents()
+    meta = next((a for a in top_level if a.name == "meta_memory_agent"), None)
+    if not meta:
+        return {}
+
+    from mirix.schemas.agent import AgentState
+
+    resp = await client._request(
+        "GET", f"/agents?parent_id={meta.id}&limit=1000"
+    )
+    sub_agents = resp if isinstance(resp, list) else resp.get("agents", [])
+    result = {"meta_memory_agent": meta.id}
+    for data in sub_agents:
+        agent = AgentState(**data)
+        short = agent.name
+        if "meta_memory_agent_" in short:
+            short = (
+                short.replace("meta_memory_agent_", "")
+                .replace("_memory_agent", "")
+                .replace("_agent", "")
+            )
+        result[short] = agent.id
+    return result
+
+
+# -----------------------------------------------------------------
+# System prompt is stored on the agent, not as a message row
+# -----------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_system_prompt_stored_on_agent_not_as_message(msg_client):
+    """The system prompt lives in agent_state.system. Updating it should
+    never create a message row with role='system' in the messages table.
+    """
+    client = msg_client
+    agent_map = await _get_sub_agent_ids(client)
+
+    agent_name = "episodic"
+    if agent_name not in agent_map:
+        pytest.skip(f"Agent '{agent_name}' not found")
+
+    agent_id = agent_map[agent_name]
+
+    new_prompt = (
+        "You are an episodic memory agent for integration testing. "
+        "Extract episodic events from conversations."
+    )
+    updated = await client.update_system_prompt(
+        agent_name=agent_name, system_prompt=new_prompt
+    )
+
+    assert updated.system == new_prompt
+
+    await asyncio.sleep(1)
+
+    messages = await _get_message_rows(
+        agent_id=agent_id,
+        user_id=MSG_TEST_USER_ID,
+        org_id=TEST_ORG_ID,
+    )
+    system_msgs = [m for m in messages if m.role == "system"]
+    assert len(system_msgs) == 0, (
+        f"System prompt should not be stored as a message row; "
+        f"found {len(system_msgs)} system message(s)"
+    )
+
+
+# -----------------------------------------------------------------
+# Retention=0: no message rows persist after processing
+# -----------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_no_messages_persisted_with_zero_retention(msg_client):
+    """When a client has message_set_retention_count=0, processing a
+    conversation should leave zero message rows in the DB for every
+    agent in the pipeline.
+    """
+    client = msg_client
+    agent_map = await _get_sub_agent_ids(client)
+
+    server = _get_server()
+    db_client = await server.client_manager.get_client_by_id(
+        MSG_TEST_CLIENT_ID
+    )
+    assert (db_client.message_set_retention_count or 0) == 0, (
+        "Test client should default to retention=0"
+    )
+
+    result = await client.add(
+        user_id=MSG_TEST_USER_ID,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "I had lunch with Alex at the Italian place on 5th Ave.",
+                    }
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Got it, I'll remember your lunch with Alex.",
+                    }
+                ],
+            },
+        ],
+    )
+    assert result.get("success") is True
+
+    print("  Waiting for queue processing (15s)...")
+    await asyncio.sleep(15)
+
+    for name, aid in agent_map.items():
+        messages = await _get_message_rows(
+            agent_id=aid,
+            user_id=MSG_TEST_USER_ID,
+            org_id=TEST_ORG_ID,
+        )
+        assert len(messages) == 0, (
+            f"Agent '{name}' should have 0 message rows with "
+            f"retention=0, found {len(messages)}"
+        )
+
+
+# -----------------------------------------------------------------
+# Retention=N: keeps at most N message-sets, prunes older ones
+# -----------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_message_retention_prunes_to_limit(msg_client):
+    """With message_set_retention_count=2, sending 3 conversations should
+    leave at most 2 retained message rows for the meta agent. The oldest
+    message-set is pruned after each save.
+    """
+    client = msg_client
+    agent_map = await _get_sub_agent_ids(client)
+
+    if "meta_memory_agent" not in agent_map:
+        pytest.skip("Meta agent not found")
+
+    meta_agent_id = agent_map["meta_memory_agent"]
+
+    server = _get_server()
+    from mirix.schemas.client import ClientUpdate
+
+    await server.client_manager.update_client(
+        ClientUpdate(id=MSG_TEST_CLIENT_ID, message_set_retention_count=2)
+    )
+
+    db_client = await server.client_manager.get_client_by_id(
+        MSG_TEST_CLIENT_ID
+    )
+    assert db_client.message_set_retention_count == 2
+
+    try:
+        conversations = [
+            "I went hiking at Mount Tamalpais this morning.",
+            "I finished reading The Great Gatsby last night.",
+            "I started learning to play guitar today.",
+        ]
+
+        for i, text in enumerate(conversations):
+            result = await client.add(
+                user_id=MSG_TEST_USER_ID,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [{"type": "text", "text": text}],
+                    },
+                    {
+                        "role": "assistant",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": f"Noted. (conversation {i+1})",
+                            }
+                        ],
+                    },
+                ],
+            )
+            assert result.get("success") is True
+
+            print(f"  Sent conversation {i+1}/3, waiting 15s...")
+            await asyncio.sleep(15)
+
+        messages = await _get_message_rows(
+            agent_id=meta_agent_id,
+            user_id=MSG_TEST_USER_ID,
+            org_id=TEST_ORG_ID,
+        )
+
+        assert len(messages) <= 2, (
+            f"Expected at most 2 retained message rows with "
+            f"retention=2, found {len(messages)}"
+        )
+
+    finally:
+        await server.client_manager.update_client(
+            ClientUpdate(id=MSG_TEST_CLIENT_ID, message_set_retention_count=0)
+        )
+
+        from mirix.services.message_manager import MessageManager
+
+        mm = MessageManager()
+        await mm.hard_delete_user_messages_for_agent(
+            agent_id=meta_agent_id,
+            user_id=MSG_TEST_USER_ID,
+            actor=db_client,
+            keep_newest_n=0,
+        )
+
+
+# -----------------------------------------------------------------
+# Failed processing leaves no partial message state
+# -----------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_failed_processing_leaves_no_messages(msg_client):
+    """When processing fails (e.g. input exceeds the context window),
+    no partial message rows should remain in the DB.
+    """
+    client = msg_client
+    agent_map = await _get_sub_agent_ids(client)
+
+    if "meta_memory_agent" not in agent_map:
+        pytest.skip("Meta agent not found")
+
+    server = _get_server()
+    db_client = await server.client_manager.get_client_by_id(
+        MSG_TEST_CLIENT_ID
+    )
+    assert (db_client.message_set_retention_count or 0) == 0
+
+    # ~2M chars / ~500k tokens — well beyond any model's context window
+    huge_text = "overflow " * 200_000
+
+    try:
+        await client.add(
+            user_id=MSG_TEST_USER_ID,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [{"type": "text", "text": huge_text}],
+                },
+                {
+                    "role": "assistant",
+                    "content": [
+                        {"type": "text", "text": "Acknowledged."}
+                    ],
+                },
+            ],
+        )
+    except Exception:
+        pass
+
+    print("  Waiting for processing attempt (20s)...")
+    await asyncio.sleep(20)
+
+    for name, aid in agent_map.items():
+        messages = await _get_message_rows(
+            agent_id=aid,
+            user_id=MSG_TEST_USER_ID,
+            org_id=TEST_ORG_ID,
+        )
+        assert len(messages) == 0, (
+            f"Agent '{name}' should have 0 message rows after a "
+            f"failed processing attempt, found {len(messages)}"
+        )
+
+
 if __name__ == "__main__":
     pytest.main([__file__, "-v", "-s", "-m", "integration"])
diff --git a/tests/test_message_handling.py b/tests/test_message_handling.py
index eb8a9c0b3..77ef95548 100644
--- a/tests/test_message_handling.py
+++ b/tests/test_message_handling.py
@@ -1,27 +1,27 @@
 """
-Tests for message handling, particularly the race condition fix.
+Tests for message handling after the message_ids refactor.
 
 Tests cover:
-1. get_messages_by_ids gracefully handles missing message IDs
-2. get_in_context_messages filters by user when user is provided
-3. get_in_context_messages returns all messages when user is not provided
+1. get_messages_for_agent_user returns messages in chronological order
+2. hard_delete_user_messages_for_agent deletes correct rows and keeps newest N
+3. Retention=0 path: no DB persistence after step
+4. Retention=N path: persists input messages and prunes to N newest
 """
 
+import asyncio
 from contextlib import asynccontextmanager
 from datetime import datetime
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from mirix.schemas.agent import AgentState
 from mirix.schemas.client import Client
 from mirix.schemas.message import Message
 from mirix.schemas.user import User
-from mirix.services.agent_manager import AgentManager
 from mirix.services.message_manager import MessageManager
 
 
-def make_client(id="client-1", org_id="org-1"):
+def make_client(id="client-1", org_id="org-1", retention=0):
     """Create a real Client object for tests."""
     return Client(
         id=id,
@@ -30,6 +30,7 @@ def make_client(id="client-1", org_id="org-1"):
         status="active",
         write_scope="test",
         read_scopes=["test"],
+        message_set_retention_count=retention,
         created_at=datetime.now(),
         updated_at=datetime.now(),
         is_deleted=False,
@@ -50,196 +51,151 @@ def make_user(id="user-1", org_id="org-1"):
     )
 
 
-def make_agent_state(message_ids=None):
-    """Create a mock AgentState with spec for type checking."""
-    agent_state = MagicMock(spec=AgentState)
-    agent_state.message_ids = message_ids or []
-    return agent_state
+def make_pydantic_message(id: str, role: str = "user", user_id: str = "user-1") -> MagicMock:
+    msg = MagicMock(spec=Message)
+    msg.id = id
+    msg.role = role
+    msg.user_id = user_id
+    return msg
 
 
-@pytest.mark.asyncio
-class TestGetMessagesByIds:
-    """Tests for MessageManager.get_messages_by_ids() - race condition fix"""
+class TestGetMessagesForAgentUser:
+    """Tests for MessageManager.get_messages_for_agent_user()"""
 
-    async def test_returns_existing_messages_skips_missing(self):
-        """
-        Test that get_messages_by_ids returns existing messages and skips missing ones.
-
-        This is the key fix for the race condition - when concurrent workers
-        delete messages via summarization, other workers should not crash.
-        """
+    def test_returns_messages_in_chronological_order(self):
+        """DB returns newest-first; method should reverse to chronological."""
         manager = MessageManager()
 
-        # Mock the session and MessageModel.list to return only 2 of 3 requested messages
-        mock_session = MagicMock()
-        mock_msg1 = MagicMock()
-        mock_msg1.id = "msg-1"
-        mock_msg1.to_pydantic.return_value = MagicMock(id="msg-1")
+        # Simulate DB returning newest-first (DESC order)
+        msg_old = MagicMock()
+        msg_old.to_pydantic.return_value = make_pydantic_message("msg-1")
+        msg_new = MagicMock()
+        msg_new.to_pydantic.return_value = make_pydantic_message("msg-2")
 
-        mock_msg2 = MagicMock()
-        mock_msg2.id = "msg-2"
-        mock_msg2.to_pydantic.return_value = MagicMock(id="msg-2")
+        mock_result = MagicMock()
+        mock_result.scalars.return_value.all.return_value = [msg_new, msg_old]  # newest first from DB
 
-        # msg-3 is "missing" (simulates deletion by another worker)
+        mock_session = AsyncMock()
+        mock_session.execute = AsyncMock(return_value=mock_result)
 
         @asynccontextmanager
         async def _async_cm():
             yield mock_session
 
-        with patch.object(manager, "session_maker") as mock_session_maker:
-            mock_session_maker.return_value = _async_cm()
-
-            with patch("mirix.services.message_manager.MessageModel") as MockMessageModel:
-                MockMessageModel.list = AsyncMock(return_value=[mock_msg1, mock_msg2])
-
+        async def run():
+            with patch.object(manager, "session_maker", return_value=_async_cm()):
                 actor = make_client()
-
-                # Request 3 messages, but only 2 exist
-                result = await manager.get_messages_by_ids(
-                    message_ids=["msg-1", "msg-2", "msg-3"], actor=actor
+                return await manager.get_messages_for_agent_user(
+                    agent_id="agent-1", user_id="user-1", actor=actor, limit=10
                 )
 
-                # Should return only the 2 that exist, not crash
-                assert len(result) == 2
-                assert result[0].id == "msg-1"
-                assert result[1].id == "msg-2"
-
-    async def test_preserves_order_of_existing_messages(self):
-        """Test that returned messages maintain the requested order."""
-        manager = MessageManager()
-
-        mock_msg2 = MagicMock()
-        mock_msg2.id = "msg-2"
-        mock_msg2.to_pydantic.return_value = MagicMock(id="msg-2")
-
-        mock_msg1 = MagicMock()
-        mock_msg1.id = "msg-1"
-        mock_msg1.to_pydantic.return_value = MagicMock(id="msg-1")
+        result = asyncio.run(run())
 
-        @asynccontextmanager
-        async def _async_cm():
-            yield MagicMock()
+        # Should be reversed to chronological order
+        assert len(result) == 2
+        assert result[0].id == "msg-1"  # oldest first
+        assert result[1].id == "msg-2"
 
-        with patch.object(manager, "session_maker") as mock_session_maker:
-            mock_session_maker.return_value = _async_cm()
-
-            with patch("mirix.services.message_manager.MessageModel") as MockMessageModel:
-                # DB returns in different order
-                MockMessageModel.list = AsyncMock(return_value=[mock_msg2, mock_msg1])
-
-                actor = make_client()
-
-                result = await manager.get_messages_by_ids(
-                    message_ids=["msg-1", "msg-2"], actor=actor
-                )
+    def test_returns_empty_when_no_messages(self):
+        """Returns empty list when no messages exist."""
+        manager = MessageManager()
 
-                # Should be in requested order, not DB order
-                assert result[0].id == "msg-1"
-                assert result[1].id == "msg-2"
+        mock_result = MagicMock()
+        mock_result.scalars.return_value.all.return_value = []
 
-    async def test_returns_empty_list_when_all_missing(self):
-        """Test that an empty list is returned when all messages are missing."""
-        manager = MessageManager()
+        mock_session = AsyncMock()
+        mock_session.execute = AsyncMock(return_value=mock_result)
 
         @asynccontextmanager
         async def _async_cm():
-            yield MagicMock()
-
-        with patch.object(manager, "session_maker") as mock_session_maker:
-            mock_session_maker.return_value = _async_cm()
-
-            with patch("mirix.services.message_manager.MessageModel") as MockMessageModel:
-                MockMessageModel.list = AsyncMock(return_value=[])
+            yield mock_session
 
+        async def run():
+            with patch.object(manager, "session_maker", return_value=_async_cm()):
                 actor = make_client()
-
-                result = await manager.get_messages_by_ids(
-                    message_ids=["msg-1", "msg-2"], actor=actor
+                return await manager.get_messages_for_agent_user(
+                    agent_id="agent-1", user_id="user-1", actor=actor, limit=10
                 )
 
-                assert result == []
-
-
-@pytest.mark.asyncio
-class TestGetInContextMessages:
-    """Tests for AgentManager.get_in_context_messages() - user filtering fix"""
-
-    async def test_filters_by_user_id_when_user_provided(self):
-        """
-        Test that messages are filtered by user.id when user parameter is provided.
+        result = asyncio.run(run())
+        assert result == []
 
-        This fixes the bug where actor.id (client ID) was used instead of user.id.
-        """
-        manager = AgentManager()
 
-        # Create mock messages
-        system_msg = MagicMock()
-        system_msg.user_id = "system"
+class TestHardDeleteUserMessagesForAgent:
+    """Tests for MessageManager.hard_delete_user_messages_for_agent()"""
 
-        user_a_msg = MagicMock()
-        user_a_msg.user_id = "user-a"
-
-        user_b_msg = MagicMock()
-        user_b_msg.user_id = "user-b"
-
-        with patch.object(manager, "message_manager") as mock_msg_manager:
-            mock_msg_manager.get_messages_by_ids = AsyncMock(
-                return_value=[system_msg, user_a_msg, user_b_msg]
-            )
-
-            agent_state = make_agent_state(message_ids=["sys-1", "msg-a", "msg-b"])
-            actor = make_client(id="client-123")
-            user = make_user(id="user-a")  # Should filter to this user's messages
+    def test_deletes_all_when_keep_newest_n_is_zero(self):
+        """keep_newest_n=0 means delete everything."""
+        manager = MessageManager()
 
-            result = await manager.get_in_context_messages(
-                agent_state=agent_state, actor=actor, user=user
-            )
+        delete_ids_result = MagicMock()
+        delete_ids_result.all.return_value = [("msg-1",), ("msg-2",), ("msg-3",)]
 
-            # Should have system message + only user-a's message
-            assert len(result) == 2
-            assert result[0] == system_msg
-            assert result[1] == user_a_msg
+        execute_results = [
+            delete_ids_result,  # select IDs to delete
+            MagicMock(),        # DELETE statement
+        ]
 
-    async def test_no_filtering_when_user_not_provided(self):
-        """
-        Test that all messages are returned when user parameter is not provided.
+        mock_session = AsyncMock()
+        mock_session.execute = AsyncMock(side_effect=execute_results)
+        mock_session.commit = AsyncMock()
 
-        This maintains backward compatibility.
-        """
-        manager = AgentManager()
+        @asynccontextmanager
+        async def _async_cm():
+            yield mock_session
 
-        system_msg = MagicMock()
-        user_a_msg = MagicMock()
-        user_b_msg = MagicMock()
+        async def run():
+            with patch.object(manager, "session_maker", return_value=_async_cm()):
+                with patch("mirix.database.redis_client.get_redis_client", return_value=None):
+                    actor = make_client()
+                    return await manager.hard_delete_user_messages_for_agent(
+                        agent_id="agent-1",
+                        user_id="user-1",
+                        actor=actor,
+                        keep_newest_n=0,
+                    )
+
+        count = asyncio.run(run())
+        assert count == 3
+
+    def test_returns_zero_when_no_messages_exist(self):
+        """Returns 0 when there are no messages to delete."""
+        manager = MessageManager()
 
-        with patch.object(manager, "message_manager") as mock_msg_manager:
-            mock_msg_manager.get_messages_by_ids = AsyncMock(
-                return_value=[system_msg, user_a_msg, user_b_msg]
-            )
+        delete_ids_result = MagicMock()
+        delete_ids_result.all.return_value = []  # nothing to delete
 
-            agent_state = make_agent_state(message_ids=["sys-1", "msg-a", "msg-b"])
-            actor = make_client()
+        mock_session = AsyncMock()
+        mock_session.execute = AsyncMock(return_value=delete_ids_result)
+        mock_session.commit = AsyncMock()
 
-            # No user parameter
-            result = await manager.get_in_context_messages(
-                agent_state=agent_state, actor=actor
-            )
+        @asynccontextmanager
+        async def _async_cm():
+            yield mock_session
 
-            # Should return all messages (no filtering)
-            assert len(result) == 3
+        async def run():
+            with patch.object(manager, "session_maker", return_value=_async_cm()):
+                actor = make_client()
+                return await manager.hard_delete_user_messages_for_agent(
+                    agent_id="agent-1",
+                    user_id="user-1",
+                    actor=actor,
+                    keep_newest_n=0,
+                )
 
-    async def test_returns_empty_when_no_messages(self):
-        """Test that empty list is returned when agent has no messages."""
-        manager = AgentManager()
+        count = asyncio.run(run())
+        assert count == 0
 
-        with patch.object(manager, "message_manager") as mock_msg_manager:
-            mock_msg_manager.get_messages_by_ids = AsyncMock(return_value=[])
 
-            agent_state = make_agent_state(message_ids=[])
-            actor = make_client()
+class TestRetentionBehavior:
+    """Tests that retention=0 vs retention>0 produces correct persistence behavior."""
 
-            result = await manager.get_in_context_messages(
-                agent_state=agent_state, actor=actor
-            )
+    def test_client_default_retention_is_zero(self):
+        """Clients default to message_set_retention_count=0."""
+        client = make_client()
+        assert (client.message_set_retention_count or 0) == 0
 
-            assert result == []
+    def test_client_with_retention_has_correct_value(self):
+        """Clients configured with retention=5 expose that value."""
+        client = make_client(retention=5)
+        assert client.message_set_retention_count == 5
diff --git a/tests/test_queue.py b/tests/test_queue.py
index f2f4413ec..8d8fe5648 100644
--- a/tests/test_queue.py
+++ b/tests/test_queue.py
@@ -43,14 +43,6 @@
 # ============================================================================
 
 
-@pytest_asyncio.fixture
-def event_loop():
-    """Single event loop for the module so DB managers and tests share one loop."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
 @pytest_asyncio.fixture(scope="module", autouse=True)
 async def _init_db():
     """Create all DB tables before any test in this module touches the database."""
diff --git a/tests/test_raw_memory.py b/tests/test_raw_memory.py
index b07abded3..1ef1f72a4 100644
--- a/tests/test_raw_memory.py
+++ b/tests/test_raw_memory.py
@@ -519,7 +519,7 @@ async def test_raw_memory_create_with_redis(raw_memory_manager, test_actor, test
     await raw_memory_manager.delete_raw_memory(created.id, test_actor)
 
 
-async def test_raw_memory_cache_hit_performance(raw_memory_manager, test_actor, test_user):
+async def test_raw_memory_cache_hit_performance(raw_memory_manager, test_actor, test_user, redis_client):
     """Test cache hit performance for raw memory reads."""
     memory_data = RawMemoryItemCreate(
         context="Redis test: Performance testing context",
diff --git a/tests/test_redis_integration.py b/tests/test_redis_integration.py
index 4e81fe450..8e64a48dd 100644
--- a/tests/test_redis_integration.py
+++ b/tests/test_redis_integration.py
@@ -40,13 +40,6 @@
 from mirix.database.redis_client import RedisMemoryClient, get_redis_client, initialize_redis_client
 
 
-@pytest.fixture(scope="module")
-def event_loop():
-    """Single event loop for the whole module so engine and Redis stay on one loop."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
 from mirix.log import get_logger
 from mirix.schemas.agent import AgentType, CreateAgent, UpdateAgent
 from mirix.schemas.block import Block as PydanticBlock
@@ -91,39 +84,16 @@ def generate_test_id(prefix: str) -> str:
 
 
 @pytest_asyncio.fixture(scope="module", autouse=True)
-async def _ensure_server_and_redis_in_loop():
-    """Import server in the module event loop and use NullPool to avoid connection reuse issues."""
-    import mirix.server.server as server_module  # noqa: F401
-
-    # Use NullPool so each session gets a fresh connection (avoids 'another operation is in progress')
-    if (
-        hasattr(server_module, "engine")
-        and server_module.engine is not None
-        and "asyncpg" in str(server_module.engine.url)
-    ):
-        from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
-        from sqlalchemy.pool import NullPool
-
-        await server_module.engine.dispose()
-        _pg_uri = settings.mirix_pg_uri.replace("postgresql+pg8000://", "postgresql+asyncpg://").replace(
-            "postgresql://", "postgresql+asyncpg://"
-        )
-        server_module.engine = create_async_engine(_pg_uri, poolclass=NullPool, echo=settings.pg_echo)
-        server_module.AsyncSessionLocal = async_sessionmaker(
-            bind=server_module.engine,
-            class_=AsyncSession,
-            autocommit=False,
-            autoflush=False,
-            expire_on_commit=False,
-        )
+async def _ensure_tables_and_redis():
+    """Ensure DB tables exist (engine reset handled by conftest._reset_engine_per_module)."""
+    import mirix.server.server as server_module
 
     await server_module.ensure_tables_created()
-
     yield
 
 
 @pytest_asyncio.fixture
-async def redis_client(_ensure_server_and_redis_in_loop):
+async def redis_client(_ensure_tables_and_redis):
     """Redis client for tests: use a fresh client in this loop to avoid 'Future attached to different loop'."""
     if not settings.redis_enabled:
         pytest.skip("Redis not enabled - set MIRIX_REDIS_ENABLED=true")
@@ -213,61 +183,61 @@ async def test_client(test_organization):
 
 
 @pytest_asyncio.fixture
-async def block_manager(_ensure_server_and_redis_in_loop):
+async def block_manager(_ensure_tables_and_redis):
     """Create block manager instance (after server in loop)."""
     return BlockManager()
 
 
 @pytest_asyncio.fixture
-async def message_manager(_ensure_server_and_redis_in_loop):
+async def message_manager(_ensure_tables_and_redis):
     """Create message manager instance (after server in loop)."""
     return MessageManager()
 
 
 @pytest_asyncio.fixture
-async def episodic_manager(_ensure_server_and_redis_in_loop):
+async def episodic_manager(_ensure_tables_and_redis):
     """Create episodic memory manager instance (after server in loop)."""
     return EpisodicMemoryManager()
 
 
 @pytest_asyncio.fixture
-async def semantic_manager(_ensure_server_and_redis_in_loop):
+async def semantic_manager(_ensure_tables_and_redis):
     """Create semantic memory manager instance (after server in loop)."""
     return SemanticMemoryManager()
 
 
 @pytest_asyncio.fixture
-async def procedural_manager(_ensure_server_and_redis_in_loop):
+async def procedural_manager(_ensure_tables_and_redis):
     """Create procedural memory manager instance (after server in loop)."""
     return ProceduralMemoryManager()
 
 
 @pytest_asyncio.fixture
-async def resource_manager(_ensure_server_and_redis_in_loop):
+async def resource_manager(_ensure_tables_and_redis):
     """Create resource memory manager instance (after server in loop)."""
     return ResourceMemoryManager()
 
 
 @pytest_asyncio.fixture
-async def knowledge_manager(_ensure_server_and_redis_in_loop):
+async def knowledge_manager(_ensure_tables_and_redis):
     """Create knowledge vault manager instance (after server in loop)."""
     return KnowledgeVaultManager()
 
 
 @pytest_asyncio.fixture
-async def organization_manager(_ensure_server_and_redis_in_loop):
+async def organization_manager(_ensure_tables_and_redis):
     """Create organization manager (after server is imported in loop)."""
     return OrganizationManager()
 
 
 @pytest_asyncio.fixture
-async def user_manager(_ensure_server_and_redis_in_loop):
+async def user_manager(_ensure_tables_and_redis):
     """Create user manager instance (after server in loop)."""
     return UserManager()
 
 
 @pytest_asyncio.fixture(scope="module")
-async def ensure_admin_user(_ensure_server_and_redis_in_loop):
+async def ensure_admin_user(_ensure_tables_and_redis):
     """Ensure the admin user exists in the database.
 
     This is needed because agent creation creates messages that default
diff --git a/tests/test_scoped_blocks.py b/tests/test_scoped_blocks.py
index 4843a950e..f68f1a9b3 100644
--- a/tests/test_scoped_blocks.py
+++ b/tests/test_scoped_blocks.py
@@ -31,42 +31,6 @@
 pytestmark = pytest.mark.asyncio(loop_scope="module")
 
 
-@pytest.fixture(scope="module")
-def event_loop():
-    """Single event loop for the module so shared fixtures and DB use one loop."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
-@pytest_asyncio.fixture(scope="module", autouse=True)
-async def _ensure_server_in_loop():
-    """Import server in the module event loop and use NullPool to avoid connection reuse issues."""
-    import mirix.server.server as server_module  # noqa: F401
-
-    if (
-        hasattr(server_module, "engine")
-        and server_module.engine is not None
-        and "asyncpg" in str(server_module.engine.url)
-    ):
-        from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
-        from sqlalchemy.pool import NullPool
-
-        await server_module.engine.dispose()
-        _pg_uri = settings.mirix_pg_uri.replace("postgresql+pg8000://", "postgresql+asyncpg://").replace(
-            "postgresql://", "postgresql+asyncpg://"
-        )
-        server_module.engine = create_async_engine(_pg_uri, poolclass=NullPool, echo=settings.pg_echo)
-        server_module.AsyncSessionLocal = async_sessionmaker(
-            bind=server_module.engine,
-            class_=AsyncSession,
-            autocommit=False,
-            autoflush=False,
-            expire_on_commit=False,
-        )
-    yield
-
-
 # =============================================================================
 # Helpers
 # =============================================================================
@@ -82,7 +46,7 @@ def _test_id(prefix: str) -> str:
 
 
 @pytest_asyncio.fixture(scope="module")
-async def test_org(_ensure_server_in_loop):
+async def test_org():
     org_mgr = OrganizationManager()
     org_id = _test_id("scoped-blk-org")
     try:
diff --git a/tests/test_search_all_users.py b/tests/test_search_all_users.py
index ef74a7d74..6362e34b2 100644
--- a/tests/test_search_all_users.py
+++ b/tests/test_search_all_users.py
@@ -197,13 +197,6 @@ class TestSearchAllUsers:
 
     pytestmark = [pytest.mark.asyncio(loop_scope="class")]
 
-    @pytest.fixture(scope="class")
-    def event_loop(self):
-        """Single event loop for the test class so all clients and tests share one loop."""
-        loop = asyncio.new_event_loop()
-        yield loop
-        loop.close()
-
     @pytest.fixture(scope="class")
     def client_scope_value(self):
         """Client scope value used for testing."""
diff --git a/tests/test_user.py b/tests/test_user.py
index 1156dd2a2..f25543741 100644
--- a/tests/test_user.py
+++ b/tests/test_user.py
@@ -65,14 +65,6 @@ def server_check():
     )
 
 
-@pytest.fixture(scope="module")
-def event_loop():
-    """Single event loop for the module so client and tests share one loop."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
 @pytest_asyncio.fixture
 async def client(server_check, api_auth):
     """Create a new MirixClient per test in the current loop (avoids closed-loop httpx)."""
diff --git a/tests/test_user_manager.py b/tests/test_user_manager.py
index b1d9e8d03..3d89e8977 100644
--- a/tests/test_user_manager.py
+++ b/tests/test_user_manager.py
@@ -64,46 +64,8 @@ def client_manager():
     return ClientManager()
 
 
-@pytest_asyncio.fixture(scope="module")
-def event_loop():
-    """Single event loop for the module so global DB engine stays on one loop."""
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
-
-
-@pytest_asyncio.fixture(scope="module", autouse=True)
-async def _ensure_server_in_loop():
-    """Run server engine in the module event loop; use NullPool to avoid connection reuse."""
-    import mirix.server.server as server_module
-
-    if (
-        hasattr(server_module, "engine")
-        and server_module.engine is not None
-        and "asyncpg" in str(server_module.engine.url)
-    ):
-        from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
-        from sqlalchemy.pool import NullPool
-
-        await server_module.engine.dispose()
-        _pg_uri = settings.mirix_pg_uri.replace(
-            "postgresql+pg8000://", "postgresql+asyncpg://"
-        ).replace("postgresql://", "postgresql+asyncpg://")
-        server_module.engine = create_async_engine(
-            _pg_uri, poolclass=NullPool, echo=settings.pg_echo
-        )
-        server_module.AsyncSessionLocal = async_sessionmaker(
-            bind=server_module.engine,
-            class_=AsyncSession,
-            autocommit=False,
-            autoflush=False,
-            expire_on_commit=False,
-        )
-    yield
-
-
 @pytest_asyncio.fixture
-async def test_org1(_ensure_server_in_loop, organization_manager):
+async def test_org1(organization_manager):
     """Create test organization 1."""
     org = PydanticOrganization(id=generate_test_id("org"), name="Test Organization 1")
     created_org = await organization_manager.create_organization(org)
@@ -115,7 +77,7 @@ async def test_org1(_ensure_server_in_loop, organization_manager):
 
 
 @pytest_asyncio.fixture
-async def test_org2(_ensure_server_in_loop, organization_manager):
+async def test_org2(organization_manager):
     """Create test organization 2."""
     org = PydanticOrganization(id=generate_test_id("org"), name="Test Organization 2")
     created_org = await organization_manager.create_organization(org)

From de11b146fc51bd6bcf22bd42be1980f5d3ba2ef1 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Fri, 20 Mar 2026 19:26:02 -0700
Subject: [PATCH 05/13] fix: test fixes

---
 mirix/agent/agent.py                         | 14 +---
 scripts/run_tests_with_docker.sh             | 18 ++++-
 tests/conftest.py                            | 11 +++
 tests/test_memory_integration.py             | 48 +++++++-----
 tests/test_raw_memory.py                     |  8 +-
 tests/test_search_all_users.py               | 81 ++++++++++++++------
 tests/test_search_single_user_core_memory.py |  1 +
 7 files changed, 122 insertions(+), 59 deletions(-)

diff --git a/mirix/agent/agent.py b/mirix/agent/agent.py
index 6646c6ad4..d882db6e0 100644
--- a/mirix/agent/agent.py
+++ b/mirix/agent/agent.py
@@ -7,9 +7,9 @@
 from datetime import datetime
 from typing import Callable, List, Optional, Tuple, Union
 
+import httpx
 import numpy as np
 import pytz
-import httpx
 
 from mirix.agent.tool_validators import validate_tool_args
 from mirix.constants import (
@@ -67,12 +67,7 @@
 from mirix.services.step_manager import StepManager
 from mirix.services.tool_execution_sandbox import ToolExecutionSandbox
 from mirix.settings import settings, summarizer_settings
-from mirix.system import (
-    get_contine_chaining,
-    get_token_limit_warning,
-    package_function_response,
-    package_user_message,
-)
+from mirix.system import get_contine_chaining, get_token_limit_warning, package_function_response, package_user_message
 from mirix.tracing import trace_method
 from mirix.utils import (
     convert_timezone_to_utc,
@@ -274,11 +269,6 @@ def __init__(
         # Logger that the Agent specifically can use, will also report the agent_state ID with the logs
         # Note: Logger is already initialized earlier in constructor
 
-    async def load_last_function_response(self):
-        """Load the last function response from message history."""
-        # message_ids no longer persisted — last_function_response is tracked in-memory only
-        return self.last_function_response
-
     async def update_memory_if_changed(self, new_memory: Memory) -> bool:
         """
         Update internal memory object and system prompt if there have been modifications.
diff --git a/scripts/run_tests_with_docker.sh b/scripts/run_tests_with_docker.sh
index a28f2f48f..6d4bb3230 100755
--- a/scripts/run_tests_with_docker.sh
+++ b/scripts/run_tests_with_docker.sh
@@ -227,12 +227,26 @@ else
     PYTEST_CMD="pytest"
 fi
 
+# Detect whether user already provided a pytest marker expression.
+# If not, --integration should imply '-m integration'.
+HAS_MARKER_EXPR=false
+for arg in "${PYTEST_ARGS[@]}"; do
+    if [[ "$arg" == "-m" ]] || [[ "$arg" == --markexpr=* ]]; then
+        HAS_MARKER_EXPR=true
+        break
+    fi
+done
+
 if [ ${#PYTEST_ARGS[@]} -eq 0 ]; then
     if [ "$START_SERVER" = true ]; then
-        $PYTEST_CMD tests/ -v
+        $PYTEST_CMD tests/ -v -m integration
     else
         $PYTEST_CMD tests/ -v -m "not integration"
     fi
 else
-    $PYTEST_CMD "${PYTEST_ARGS[@]}"
+    if [ "$START_SERVER" = true ] && [ "$HAS_MARKER_EXPR" = false ]; then
+        $PYTEST_CMD -m integration "${PYTEST_ARGS[@]}"
+    else
+        $PYTEST_CMD "${PYTEST_ARGS[@]}"
+    fi
 fi
diff --git a/tests/conftest.py b/tests/conftest.py
index b547e3123..f452646c6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -128,3 +128,14 @@ def _create(client_id: str = TEST_CLIENT_ID, org_id: str = TEST_ORG_ID):
 def api_auth(api_key_factory):
     """Default API auth (single client) for tests that need only one key."""
     return api_key_factory()
+
+
+@pytest.fixture(scope="module")
+def isolate_api_key_env():
+    """Temporarily clear MIRIX_API_KEY for header-based client tests."""
+    previous_api_key = os.environ.pop("MIRIX_API_KEY", None)
+    try:
+        yield
+    finally:
+        if previous_api_key is not None:
+            os.environ["MIRIX_API_KEY"] = previous_api_key
diff --git a/tests/test_memory_integration.py b/tests/test_memory_integration.py
index ff207fb91..03eb8072d 100644
--- a/tests/test_memory_integration.py
+++ b/tests/test_memory_integration.py
@@ -82,8 +82,15 @@ async def api_auth(server_process):
 
     auth = await _create_client_and_key(TEST_CLIENT_ID, TEST_ORG_ID, org_name="Demo Org")
     os.environ.setdefault("MIRIX_API_URL", "http://localhost:8000")
+    previous_api_key = os.environ.get("MIRIX_API_KEY")
     os.environ["MIRIX_API_KEY"] = auth["api_key"]
-    return auth
+    try:
+        yield auth
+    finally:
+        if previous_api_key is None:
+            os.environ.pop("MIRIX_API_KEY", None)
+        else:
+            os.environ["MIRIX_API_KEY"] = previous_api_key
 
 
 @pytest_asyncio.fixture
@@ -98,7 +105,10 @@ async def client(server_process, api_auth):
     await c.initialize_meta_agent(config_path=str(config_path), update_agents=False)
     if c._meta_agent:
         print(f"[OK] Meta agent ready: {c._meta_agent.id}")
-    return c
+    try:
+        yield c
+    finally:
+        await c.close()
 
 
 # =================================================================
@@ -106,7 +116,7 @@ async def client(server_process, api_auth):
 # =================================================================
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_add(client):
     """Test adding memories using client.add()."""
     print("\n[TEST] Adding memory via client.add()...")
@@ -140,7 +150,7 @@ async def test_add(client):
     print(f"[OK] Memory added successfully")
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_retrieve_with_conversation(client):
     """Test retrieving memories with conversation context."""
     print("\n[TEST] Retrieving memories with conversation...")
@@ -180,7 +190,7 @@ async def test_retrieve_with_conversation(client):
                 print(f"  - {memory_type}: {items['total_count']} items")
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_retrieve_with_topic(client):
     """Test retrieving memories by topic."""
     print("\n[TEST] Retrieving memories by topic...")
@@ -216,7 +226,7 @@ async def test_retrieve_with_topic(client):
                 print(f"  - {memory_type}: {items['total_count']} items")
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_search(client):
     """Test searching memories."""
     print("\n[TEST] Searching memories...")
@@ -316,7 +326,10 @@ async def msg_client(server_process, msg_api_auth):
     await c.create_or_get_user(
         user_id=MSG_TEST_USER_ID, user_name="Message Lifecycle User"
     )
-    return c
+    try:
+        yield c
+    finally:
+        await c.close()
 
 
 def _get_server():
@@ -383,7 +396,7 @@ async def _get_sub_agent_ids(client: MirixClient):
 # -----------------------------------------------------------------
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_system_prompt_stored_on_agent_not_as_message(msg_client):
     """The system prompt lives in agent_state.system. Updating it should
     never create a message row with role='system' in the messages table.
@@ -426,7 +439,7 @@ async def test_system_prompt_stored_on_agent_not_as_message(msg_client):
 # -----------------------------------------------------------------
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_no_messages_persisted_with_zero_retention(msg_client):
     """When a client has message_set_retention_count=0, processing a
     conversation should leave zero message rows in the DB for every
@@ -488,7 +501,7 @@ async def test_no_messages_persisted_with_zero_retention(msg_client):
 # -----------------------------------------------------------------
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_message_retention_prunes_to_limit(msg_client):
     """With message_set_retention_count=2, sending 3 conversations should
     leave at most 2 retained message rows for the meta agent. The oldest
@@ -505,14 +518,10 @@ async def test_message_retention_prunes_to_limit(msg_client):
     server = _get_server()
     from mirix.schemas.client import ClientUpdate
 
-    await server.client_manager.update_client(
+    updated_client = await server.client_manager.update_client(
         ClientUpdate(id=MSG_TEST_CLIENT_ID, message_set_retention_count=2)
     )
-
-    db_client = await server.client_manager.get_client_by_id(
-        MSG_TEST_CLIENT_ID
-    )
-    assert db_client.message_set_retention_count == 2
+    assert updated_client.message_set_retention_count == 2
 
     try:
         conversations = [
@@ -557,9 +566,10 @@ async def test_message_retention_prunes_to_limit(msg_client):
         )
 
     finally:
-        await server.client_manager.update_client(
+        reset_client = await server.client_manager.update_client(
             ClientUpdate(id=MSG_TEST_CLIENT_ID, message_set_retention_count=0)
         )
+        assert (reset_client.message_set_retention_count or 0) == 0
 
         from mirix.services.message_manager import MessageManager
 
@@ -567,7 +577,7 @@ async def test_message_retention_prunes_to_limit(msg_client):
         await mm.hard_delete_user_messages_for_agent(
             agent_id=meta_agent_id,
             user_id=MSG_TEST_USER_ID,
-            actor=db_client,
+            actor=reset_client,
             keep_newest_n=0,
         )
 
@@ -577,7 +587,7 @@ async def test_message_retention_prunes_to_limit(msg_client):
 # -----------------------------------------------------------------
 
 
-@pytest.mark.asyncio
+@pytest.mark.asyncio(loop_scope="module")
 async def test_failed_processing_leaves_no_messages(msg_client):
     """When processing fails (e.g. input exceeds the context window),
     no partial message rows should remain in the DB.
diff --git a/tests/test_raw_memory.py b/tests/test_raw_memory.py
index 1ef1f72a4..7d76bacf2 100644
--- a/tests/test_raw_memory.py
+++ b/tests/test_raw_memory.py
@@ -818,7 +818,9 @@ async def test_api_create_and_get_raw_memory(api_client, raw_memory_manager, tes
 
 
 @pytest.mark.integration
-async def test_api_update_raw_memory_replace(api_client, raw_memory_manager, test_actor, test_user, mock_embedding_model):
+async def test_api_update_raw_memory_replace(
+    api_client, raw_memory_manager, test_actor, test_user, test_agent, mock_embedding_model
+):
     """Test PATCH /memory/raw/{memory_id} endpoint with replace mode."""
     import os
 
@@ -872,7 +874,7 @@ async def test_api_update_raw_memory_replace(api_client, raw_memory_manager, tes
 
 @pytest.mark.integration
 async def test_api_update_raw_memory_append_and_merge(
-    api_client, raw_memory_manager, test_actor, test_user, mock_embedding_model
+    api_client, raw_memory_manager, test_actor, test_user, test_agent, mock_embedding_model
 ):
     """Test PATCH /memory/raw/{memory_id} endpoint with append and merge modes."""
     import os
@@ -929,7 +931,7 @@ async def test_api_update_raw_memory_append_and_merge(
 
 
 @pytest.mark.integration
-async def test_api_delete_raw_memory(api_client, test_actor, test_user):
+async def test_api_delete_raw_memory(api_client, test_actor, test_user, test_agent):
     """Test DELETE /memory/raw/{memory_id} endpoint.
 
     Create via POST so create/delete/get all go through the same server (same DB and cache).
diff --git a/tests/test_search_all_users.py b/tests/test_search_all_users.py
index 6362e34b2..c8bf4d4ef 100644
--- a/tests/test_search_all_users.py
+++ b/tests/test_search_all_users.py
@@ -18,7 +18,7 @@
 import os
 import time
 from pathlib import Path
-from typing import Optional
+from typing import Any, Awaitable, Callable, Optional
 
 import pytest
 import pytest_asyncio
@@ -28,6 +28,7 @@
 # Mark all tests as integration tests (require a running server)
 pytestmark = [
     pytest.mark.integration,
+    pytest.mark.usefixtures("isolate_api_key_env"),
 ]
 
 # Configure logging
@@ -41,6 +42,24 @@
 CONFIG_PATH = Path(__file__).parent.parent / "mirix" / "configs" / "examples" / "mirix_gemini.yaml"
 
 
+async def poll_until(
+    fetch_results: Callable[[], Awaitable[dict[str, Any]]],
+    is_ready: Callable[[dict[str, Any]], bool],
+    wait_log: str,
+    max_wait_s: int = 90,
+    interval_s: int = 15,
+) -> dict[str, Any]:
+    """Poll an async search until condition is met or timeout expires."""
+    results = await fetch_results()
+    elapsed = 0
+    while not is_ready(results) and elapsed < max_wait_s:
+        logger.info(wait_log, interval_s, elapsed)
+        await asyncio.sleep(interval_s)
+        elapsed += interval_s
+        results = await fetch_results()
+    return results
+
+
 async def add_all_memories(
     client: MirixClient,
     user_id: str,
@@ -559,8 +578,18 @@ async def test_search_different_org_no_cross_contamination(
         logger.info("TEST 8: Organization isolation - same scope, different org")
         logger.info("=" * 80)
 
-        # Search with client2 (in org2)
-        results = await client2.search_all_users(query="", memory_type="all", client_id=client2.client_id, limit=100)
+        # Search with client2 (in org2). Poll briefly because async memory
+        # extraction can lag under heavier CI/local runs.
+        async def _search_client2_bm25():
+            return await client2.search_all_users(
+                query="", memory_type="all", client_id=client2.client_id, limit=100
+            )
+
+        results = await poll_until(
+            fetch_results=_search_client2_bm25,
+            is_ready=lambda r: user4_id in set(result["user_id"] for result in r["results"]),
+            wait_log="Org2 search missing user4; waiting %ss before retry (elapsed=%ss)...",
+        )
 
         user_ids_in_results = set(result["user_id"] for result in results["results"])
         logger.info(f"Client 2 search - User IDs in results: {user_ids_in_results}")
@@ -583,12 +612,21 @@ async def test_search_different_org_no_cross_contamination_embedding(
         logger.info("=" * 80)
 
         # Search with client2 (in org2)
-        results = await client2.search_all_users(
-            query="database information",  # Semantic query
-            memory_type="all",
-            search_method="embedding",
-            client_id=client2.client_id,
-            limit=100,
+        async def _search_client2_embedding():
+            return await client2.search_all_users(
+                query="database information",
+                memory_type="all",
+                search_method="embedding",
+                client_id=client2.client_id,
+                limit=100,
+            )
+
+        results = await poll_until(
+            fetch_results=_search_client2_embedding,
+            is_ready=lambda r: user4_id in set(result["user_id"] for result in r["results"]),
+            wait_log=(
+                "Org2 embedding search missing user4; waiting %ss before retry (elapsed=%ss)..."
+            ),
         )
 
         user_ids_in_results = set(result["user_id"] for result in results["results"])
@@ -663,18 +701,8 @@ async def test_search_specific_memory_type_embedding(self, client1, user1_id, us
             limit=20,
         )
 
-        max_wait_s = 90
-        interval_s = 15
-        elapsed = 0
-        while results["count"] == 0 and elapsed < max_wait_s:
-            logger.info(
-                "Semantic embedding search returned 0; waiting %ss before retry (elapsed=%ds)...",
-                interval_s,
-                elapsed,
-            )
-            await asyncio.sleep(interval_s)
-            elapsed += interval_s
-            results = await client1.search_all_users(
+        async def _search_semantic_embedding():
+            return await client1.search_all_users(
                 query="programming language concepts",
                 memory_type="semantic",
                 search_method="embedding",
@@ -682,14 +710,21 @@ async def test_search_specific_memory_type_embedding(self, client1, user1_id, us
                 limit=20,
             )
 
+        results = await poll_until(
+            fetch_results=_search_semantic_embedding,
+            is_ready=lambda r: r["count"] > 0,
+            wait_log=(
+                "Semantic embedding search returned 0; waiting %ss before retry (elapsed=%ss)..."
+            ),
+        )
+
         logger.info(f"Results: {results['count']} semantic memories found")
         logger.info(f"Search Method: {results.get('search_method')}")
 
         assert results["success"] is True
         assert results["search_method"] == "embedding"
         assert results["count"] > 0, (
-            "Semantic embedding search still 0 results after waiting %ds (index may not be ready)."
-            % max_wait_s
+            "Semantic embedding search still 0 results after waiting for retries (index may not be ready)."
         )
 
         # All results should be semantic type
diff --git a/tests/test_search_single_user_core_memory.py b/tests/test_search_single_user_core_memory.py
index 2ce0752dd..ca3691138 100644
--- a/tests/test_search_single_user_core_memory.py
+++ b/tests/test_search_single_user_core_memory.py
@@ -26,6 +26,7 @@
 
 pytestmark = [
     pytest.mark.integration,
+    pytest.mark.usefixtures("isolate_api_key_env"),
 ]
 
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

From 102bf2c9da09f573da25f5fb31e3f227f37421dc Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Fri, 20 Mar 2026 19:49:55 -0700
Subject: [PATCH 06/13] fix: remove _generate_initial_message_sequence and
 append_initial_message_sequence_to_in_context_messages

These were accidentally restored from re-org during merge conflict resolution.
No callers remain (sdk.py no longer passes add_default_initial_messages).
Also removes the dead if add_default_initial_messages block from reset_messages
and the now-unused MessageCreate/PydanticMessage imports.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 mirix/services/agent_manager.py | 67 +--------------------------------
 1 file changed, 1 insertion(+), 66 deletions(-)

diff --git a/mirix/services/agent_manager.py b/mirix/services/agent_manager.py
index 78bde2c09..392e01c6a 100644
--- a/mirix/services/agent_manager.py
+++ b/mirix/services/agent_manager.py
@@ -33,8 +33,6 @@
 from mirix.schemas.agent import AgentState as PydanticAgentState
 from mirix.schemas.agent import AgentType, CreateAgent, CreateMetaAgent, UpdateAgent, UpdateMetaAgent
 from mirix.schemas.client import Client as PydanticClient
-from mirix.schemas.message import Message as PydanticMessage
-from mirix.schemas.message import MessageCreate
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.enums import ToolType
 from mirix.schemas.llm_config import LLMConfig
@@ -626,63 +624,6 @@ async def update_agent_tools_and_system_prompts(
                 actor=actor,
             )
 
-    @enforce_types
-    def _generate_initial_message_sequence(
-        self,
-        actor: PydanticClient,
-        agent_state: PydanticAgentState,
-        supplied_initial_message_sequence: Optional[List[MessageCreate]] = None,
-        user_id: Optional[str] = None,
-    ) -> List[PydanticMessage]:
-        init_messages = initialize_message_sequence(
-            agent_state=agent_state,
-            memory_edit_timestamp=get_utc_time(),
-            include_initial_boot_message=True,
-        )
-        if supplied_initial_message_sequence is not None:
-            # We always need the system prompt up front
-            system_message_obj = PydanticMessage.dict_to_message(
-                agent_id=agent_state.id,
-                model=agent_state.llm_config.model,
-                openai_message_dict=init_messages[0],
-            )
-            # Don't use anything else in the pregen sequence, instead use the provided sequence
-            init_messages = [system_message_obj]
-            init_messages.extend(
-                package_initial_message_sequence(
-                    agent_state.id,
-                    supplied_initial_message_sequence,
-                    agent_state.llm_config.model,
-                    actor,
-                    user_id=user_id,
-                )
-            )
-        else:
-            init_messages = [
-                PydanticMessage.dict_to_message(
-                    agent_id=agent_state.id,
-                    model=agent_state.llm_config.model,
-                    openai_message_dict=msg,
-                )
-                for msg in init_messages
-            ]
-
-        return init_messages
-
-    @enforce_types
-    async def append_initial_message_sequence_to_in_context_messages(
-        self,
-        actor: PydanticClient,
-        agent_state: PydanticAgentState,
-        initial_message_sequence: Optional[List[MessageCreate]] = None,
-        user_id: Optional[str] = None,
-    ) -> PydanticAgentState:
-        init_messages = self._generate_initial_message_sequence(
-            actor, agent_state, initial_message_sequence, user_id=user_id
-        )
-        return await self.append_to_in_context_messages(
-            init_messages, agent_id=agent_state.id, actor=actor, user_id=user_id
-        )
 
     @enforce_types
     async def _create_agent(
@@ -1543,13 +1484,7 @@ async def reset_messages(
                 )
                 await session.commit()
 
-        agent_state = await self.get_agent_by_id(agent_id=agent_id, actor=actor)
-
-        if add_default_initial_messages:
-            return await self.append_initial_message_sequence_to_in_context_messages(
-                actor, agent_state, user_id=user_id
-            )
-        return agent_state
+        return await self.get_agent_by_id(agent_id=agent_id, actor=actor)
 
     # ======================================================================================================================
     # Tool Management

From 7db3df5cef567c1ba2f9f84bc4c0c5ff5de2eb32 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Sat, 21 Mar 2026 08:40:33 -0700
Subject: [PATCH 07/13] chore: fix tests

---
 scripts/run_tests_with_docker.sh   |  7 +--
 tests/conftest.py                  | 10 ++++
 tests/test_memory_server.py        |  1 -
 tests/test_orm_to_pydantic_safe.py | 41 ++++++++---------
 tests/test_search_all_users.py     | 74 ++++++++++++++++++++++--------
 5 files changed, 90 insertions(+), 43 deletions(-)

diff --git a/scripts/run_tests_with_docker.sh b/scripts/run_tests_with_docker.sh
index 6d4bb3230..2b0e5f2a9 100755
--- a/scripts/run_tests_with_docker.sh
+++ b/scripts/run_tests_with_docker.sh
@@ -228,7 +228,6 @@ else
 fi
 
 # Detect whether user already provided a pytest marker expression.
-# If not, --integration should imply '-m integration'.
 HAS_MARKER_EXPR=false
 for arg in "${PYTEST_ARGS[@]}"; do
     if [[ "$arg" == "-m" ]] || [[ "$arg" == --markexpr=* ]]; then
@@ -239,13 +238,15 @@ done
 
 if [ ${#PYTEST_ARGS[@]} -eq 0 ]; then
     if [ "$START_SERVER" = true ]; then
-        $PYTEST_CMD tests/ -v -m integration
+        # Override pytest.ini default '-m "not integration"' so all tests run.
+        $PYTEST_CMD tests/ -v -m "integration or not integration"
     else
         $PYTEST_CMD tests/ -v -m "not integration"
     fi
 else
     if [ "$START_SERVER" = true ] && [ "$HAS_MARKER_EXPR" = false ]; then
-        $PYTEST_CMD -m integration "${PYTEST_ARGS[@]}"
+        # No explicit marker provided; include both integration and non-integration.
+        $PYTEST_CMD -m "integration or not integration" "${PYTEST_ARGS[@]}"
     else
         $PYTEST_CMD "${PYTEST_ARGS[@]}"
     fi
diff --git a/tests/conftest.py b/tests/conftest.py
index f452646c6..41169f572 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -139,3 +139,13 @@ def isolate_api_key_env():
     finally:
         if previous_api_key is not None:
             os.environ["MIRIX_API_KEY"] = previous_api_key
+
+
+@pytest_asyncio.fixture(scope="module")
+async def server():
+    """Shared AsyncServer fixture for tests requiring direct server access."""
+    from mirix.server.server import AsyncServer
+
+    srv = AsyncServer()
+    await srv.ensure_defaults()
+    return srv
diff --git a/tests/test_memory_server.py b/tests/test_memory_server.py
index c5819bc96..79af281a8 100644
--- a/tests/test_memory_server.py
+++ b/tests/test_memory_server.py
@@ -735,7 +735,6 @@ async def step(self, input_messages, chaining, actor=None, user=None, topics=Non
 
         monkeypatch.setattr("mirix.agent.EpisodicMemoryAgent", MockMemoryAgent)
         monkeypatch.setattr("mirix.agent.ProceduralMemoryAgent", MockMemoryAgent)
-        monkeypatch.setattr("mirix.functions.function_sets.memory_tools.os.cpu_count", lambda: 8)
 
         class StubAgentManager:
             async def list_agents(self, *, parent_id, actor, limit=None, **kwargs):
diff --git a/tests/test_orm_to_pydantic_safe.py b/tests/test_orm_to_pydantic_safe.py
index 8a2e6f28a..1a4fc8235 100644
--- a/tests/test_orm_to_pydantic_safe.py
+++ b/tests/test_orm_to_pydantic_safe.py
@@ -127,8 +127,7 @@ async def test_episodic_memory_to_pydantic(server):
     }
     
     event = await server.episodic_memory_manager.create_episodic_memory(
-        agent_state=None,
-        event=EpisodicEventCreate(**event_data),
+        episodic_memory=EpisodicEventCreate(**event_data),
         actor=actor,
     )
     
@@ -168,9 +167,8 @@ async def test_memory_models_to_pydantic(server):
     
     # Test semantic memory
     from mirix.schemas.semantic_memory import SemanticMemoryItem as SemanticCreate
-    semantic = await server.semantic_memory_manager.create_semantic_item(
-        agent_state=None,
-        item=SemanticCreate(
+    semantic = await server.semantic_memory_manager.create_item(
+        item_data=SemanticCreate(
             name="test_concept",
             summary="Test summary",
             details="Test details",
@@ -179,6 +177,7 @@ async def test_memory_models_to_pydantic(server):
             organization_id=user.organization_id,
         ),
         actor=actor,
+        user_id=user.id,
     )
     
     async with db_context() as session:
@@ -194,17 +193,16 @@ async def test_memory_models_to_pydantic(server):
     
     # Test procedural memory
     from mirix.schemas.procedural_memory import ProceduralMemoryItem as ProceduralCreate
-    procedural = await server.procedural_memory_manager.create_procedure(
-        agent_state=None,
-        item=ProceduralCreate(
+    procedural = await server.procedural_memory_manager.create_item(
+        item_data=ProceduralCreate(
+            entry_type="workflow",
             summary="test_procedure",
-            description="Test description",
-            steps="Step 1\nStep 2",
-            tags=["test"],
+            steps=["Step 1", "Step 2"],
             user_id=user.id,
             organization_id=user.organization_id,
         ),
         actor=actor,
+        user_id=user.id,
     )
     
     async with db_context() as session:
@@ -219,17 +217,17 @@ async def test_memory_models_to_pydantic(server):
     
     # Test resource memory
     from mirix.schemas.resource_memory import ResourceMemoryItem as ResourceCreate
-    resource = await server.resource_memory_manager.create_resource(
-        agent_state=None,
-        item=ResourceCreate(
+    resource = await server.resource_memory_manager.create_item(
+        item_data=ResourceCreate(
+            title="test_resource_title",
             summary="test_resource",
             content="Resource content",
             resource_type="document",
-            source="test",
             user_id=user.id,
             organization_id=user.organization_id,
         ),
         actor=actor,
+        user_id=user.id,
     )
     
     async with db_context() as session:
@@ -244,16 +242,18 @@ async def test_memory_models_to_pydantic(server):
     
     # Test knowledge vault
     from mirix.schemas.knowledge_vault import KnowledgeVaultItem as KnowledgeCreate
-    knowledge = await server.knowledge_vault_manager.create_knowledge(
-        agent_state=None,
-        item=KnowledgeCreate(
+    knowledge = await server.knowledge_vault_manager.create_item(
+        knowledge_vault_item=KnowledgeCreate(
+            entry_type="credential",
+            source="test",
+            sensitivity="low",
             caption="test_knowledge",
             secret_value="Secret data",
-            category="test",
             user_id=user.id,
             organization_id=user.organization_id,
         ),
         actor=actor,
+        user_id=user.id,
     )
     
     async with db_context() as session:
@@ -321,8 +321,7 @@ async def test_memory_manager_list_conversion(server):
     
     from mirix.schemas.episodic_memory import EpisodicEvent as EpisodicCreate
     await server.episodic_memory_manager.create_episodic_memory(
-        agent_state=agent,
-        event=EpisodicCreate(
+        episodic_memory=EpisodicCreate(
             event_type="test",
             actor="system",
             summary="Test event",
diff --git a/tests/test_search_all_users.py b/tests/test_search_all_users.py
index c8bf4d4ef..c1e296637 100644
--- a/tests/test_search_all_users.py
+++ b/tests/test_search_all_users.py
@@ -386,11 +386,20 @@ async def test_search_all_users_with_client_id_retrieves_both_users(
         logger.info("TEST 3: Search with client_id retrieves both users with matching scope")
         logger.info("=" * 80)
 
-        results = await client1.search_all_users(
-            query="Python",  # Search for "Python" which should appear in semantic memories for both users
-            memory_type="all",
-            client_id=client1.client_id,
-            limit=50,
+        async def _search_client1_bm25():
+            return await client1.search_all_users(
+                query="Python",
+                memory_type="all",
+                client_id=client1.client_id,
+                limit=50,
+            )
+
+        results = await poll_until(
+            fetch_results=_search_client1_bm25,
+            is_ready=lambda r: r["count"] > 0,
+            wait_log=(
+                "Client1 bm25 search returned 0; waiting %ss before retry (elapsed=%ss)..."
+            ),
         )
 
         logger.info(f"Results: {results['count']} memories found")
@@ -433,12 +442,21 @@ async def test_search_all_users_with_client_id_retrieves_both_users_embedding(
         logger.info("TEST 3b: Embedding search with client_id retrieves both users with matching scope")
         logger.info("=" * 80)
 
-        results = await client1.search_all_users(
-            query="group discussion",  # Semantic query for "team meeting"
-            memory_type="all",
-            search_method="embedding",
-            client_id=client1.client_id,
-            limit=50,
+        async def _search_client1_embedding():
+            return await client1.search_all_users(
+                query="group discussion",
+                memory_type="all",
+                search_method="embedding",
+                client_id=client1.client_id,
+                limit=50,
+            )
+
+        results = await poll_until(
+            fetch_results=_search_client1_embedding,
+            is_ready=lambda r: r["count"] > 0,
+            wait_log=(
+                "Client1 embedding search returned 0; waiting %ss before retry (elapsed=%ss)..."
+            ),
         )
 
         logger.info(f"Results: {results['count']} memories found")
@@ -524,7 +542,18 @@ async def test_search_with_client3_retrieves_only_user3(self, client3, user3_id,
         logger.info("=" * 80)
 
         # Search with client3 which has write_scope='read_only'
-        results = await client3.search_all_users(query="", memory_type="all", client_id=client3.client_id, limit=100)
+        async def _search_client3_bm25():
+            return await client3.search_all_users(
+                query="", memory_type="all", client_id=client3.client_id, limit=100
+            )
+
+        results = await poll_until(
+            fetch_results=_search_client3_bm25,
+            is_ready=lambda r: user3_id in set(result["user_id"] for result in r["results"]),
+            wait_log=(
+                "Client3 bm25 search missing user3; waiting %ss before retry (elapsed=%ss)..."
+            ),
+        )
 
         logger.info(f"Results: {results['count']} memories found")
         logger.info(f"Filter Tags: {results.get('filter_tags')}")
@@ -547,12 +576,21 @@ async def test_search_with_client3_retrieves_only_user3_embedding(self, client3,
         logger.info("=" * 80)
 
         # Search with client3 which has write_scope='read_only'
-        results = await client3.search_all_users(
-            query="software development",  # Semantic query
-            memory_type="all",
-            search_method="embedding",
-            client_id=client3.client_id,
-            limit=100,
+        async def _search_client3_embedding():
+            return await client3.search_all_users(
+                query="software development",
+                memory_type="all",
+                search_method="embedding",
+                client_id=client3.client_id,
+                limit=100,
+            )
+
+        results = await poll_until(
+            fetch_results=_search_client3_embedding,
+            is_ready=lambda r: user3_id in set(result["user_id"] for result in r["results"]),
+            wait_log=(
+                "Client3 embedding search missing user3; waiting %ss before retry (elapsed=%ss)..."
+            ),
         )
 
         logger.info(f"Results: {results['count']} memories found")

From 7ef174a3cf7a2a50754298e83997dccb2b0ea66c Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Sat, 21 Mar 2026 08:42:12 -0700
Subject: [PATCH 08/13] chore: update cleanup script

---
 scripts/migrations/002_cleanup_message_ids.sql | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/migrations/002_cleanup_message_ids.sql b/scripts/migrations/002_cleanup_message_ids.sql
index 841197a20..ba6c32871 100644
--- a/scripts/migrations/002_cleanup_message_ids.sql
+++ b/scripts/migrations/002_cleanup_message_ids.sql
@@ -4,7 +4,7 @@
 
 -- 1. Delete legacy system messages stored as Message rows
 --    (system prompt now lives exclusively in agent_state.system)
-DELETE FROM messages WHERE role = 'system';
+DELETE FROM messages;
 
 -- 2. Drop the message_ids column from agents table
 ALTER TABLE agents DROP COLUMN IF EXISTS message_ids;

From db1e242beeb6727974ea79a29ceaa054c689b321 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Sat, 21 Mar 2026 19:20:45 -0700
Subject: [PATCH 09/13] chore: fix bugs and refactor

---
 mirix/agent/agent.py     | 369 +++++++++++++++------------------------
 mirix/schemas/agent.py   |   4 -
 mirix/server/rest_api.py |   2 +
 mirix/server/server.py   |  13 +-
 mirix/utils.py           |  42 +++--
 5 files changed, 187 insertions(+), 243 deletions(-)

diff --git a/mirix/agent/agent.py b/mirix/agent/agent.py
index 88270b049..58f026f6d 100644
--- a/mirix/agent/agent.py
+++ b/mirix/agent/agent.py
@@ -1233,12 +1233,13 @@ async def step(
         input_messages: Union[Message, MessageCreate, List[Union[Message, MessageCreate]]],
         chaining: bool = True,
         max_chaining_steps: Optional[int] = None,
-        extra_messages: Optional[List[dict]] = None,
-        actor: Optional["Client"] = None,  # Client for write operations (audit trail)
-        user: Optional[User] = None,  # User for read operations (data scope)
+        actor: Optional["Client"] = None,  # Client
+        user: Optional[User] = None,
         **kwargs,
     ) -> MirixUsageStatistics:
-        """Run Agent.step in a loop, handling chaining via continue_chaining requests and function failures
+        """A "step" is one full invocation of an agent.
+
+        Run Agent.inner_step in a loop, handling chaining via continue_chaining requests and function failures
 
         Args:
             actor: Client object for write operations (updating messages, agent state) - audit trail
@@ -1250,124 +1251,125 @@ async def step(
         # chat_agent is deprecated - raise immediately
         if self.agent_state.is_type(AgentType.chat_agent):
             raise NotImplementedError(
-                "AgentType.chat_agent is deprecated and no longer supported. "
-                "Use a memory agent type instead."
+                "AgentType.chat_agent is deprecated and no longer supported. " "Use a memory agent type instead."
             )
 
-        # Store actor for write operations
-        if actor:
-            self.actor = actor
+        if actor is None or user is None:
+            raise ValueError("Agent.step requires non-null actor and user.")
 
-        # Store user and load user's memory blocks
-        if user:
-            self.user = user
-
-            # Only load blocks for core_memory_agent (other agent types don't use blocks)
-            if self.agent_state.is_type(AgentType.core_memory_agent):
-                # Load existing blocks for this user, scoped by the client's write_scope.
-                # auto_create_from_default=True will create blocks from template if they don't exist for this scope.
-                # filter_tags_set_on_create is applied only when new blocks are created (e.g. from default template).
-                existing_blocks = await self.block_manager.get_blocks(
-                    user=self.user,
-                    any_scopes=self._block_scopes,
-                    filter_tags_set_on_create=self.block_filter_tags,
-                )
+        # Store actor/user context for this step invocation.
+        self.actor = actor
+        self.user = user
+
+        # Special case for Core Memory Agent: load blocks to use later in the step
+        if self.agent_state.is_type(AgentType.core_memory_agent):
+            # Load existing blocks for this user, scoped by the client's write_scope.
+            # auto_create_from_default=True will create blocks from template if they don't exist for this scope.
+            # filter_tags_set_on_create is applied only when new blocks are created (e.g. from default template).
+            existing_blocks = await self.block_manager.get_blocks(
+                user=self.user,
+                any_scopes=self._block_scopes,
+                filter_tags_set_on_create=self.block_filter_tags,
+            )
 
-                # Apply block_filter_tags to existing blocks (merge or replace).
-                # Skips blocks whose filter_tags already match the desired state
-                # (e.g. blocks just created from template with the same tags).
-                if self.block_filter_tags and existing_blocks:
-                    existing_blocks = await self._apply_block_filter_tags(existing_blocks)
+            # Apply block_filter_tags to existing blocks (merge or replace).
+            # Skips blocks whose filter_tags already match the desired state
+            # (e.g. blocks just created from template with the same tags).
+            if self.block_filter_tags and existing_blocks:
+                existing_blocks = await self._apply_block_filter_tags(existing_blocks)
 
-                # Load blocks into memory for core_memory_agent
-                self.blocks_in_memory = Memory(blocks=existing_blocks)
+            # Load blocks into memory for core_memory_agent
+            self.blocks_in_memory = Memory(blocks=existing_blocks)
 
-        # Reset last function response for this step (no longer loaded from DB history)
+        # Reset last function response for this step
         self.last_function_response = None
 
         max_chaining_steps = max_chaining_steps or MAX_CHAINING_STEPS
 
-        first_input_message = input_messages[0] if isinstance(input_messages, list) else input_messages
-
-        # Convert MessageCreate objects to Message objects
-        if not isinstance(input_messages, list):
-            input_messages = [input_messages]
-        message_objects = [
-            (
-                m
-                if isinstance(m, Message)
-                else prepare_input_message_create(
-                    m,
-                    self.agent_state.id,
-                    wrap_user_message=False,
-                    wrap_system_message=True,
-                )
-            )
-            for m in input_messages
-        ]
-
-        extra_message_objects = (
-            [
-                prepare_input_message_create(
-                    m,
-                    self.agent_state.id,
-                    wrap_user_message=False,
-                    wrap_system_message=True,
+        # Normalize to runtime Message objects for downstream prompt assembly.
+        raw_input_messages = input_messages
+        if not isinstance(raw_input_messages, list):
+            raw_input_messages = [raw_input_messages]
+
+        # At the end of this normalization step we will end up with a list containing only one Message object
+        # (multiple messages are packed into a single Message object in the upstream caller)
+        # The step also converts it from a MessageCreate to a Message object
+        # to match compatability with the downstream prompt assembly.
+        normalized_input_messages: List[Message] = []
+        for m in raw_input_messages:
+            if isinstance(m, Message):
+                normalized_input_messages.append(m)
+            elif isinstance(m, MessageCreate):
+                normalized_input_messages.append(
+                    prepare_input_message_create(
+                        m,
+                        self.agent_state.id,
+                        wrap_user_message=False,
+                        wrap_system_message=True,
+                    )
                 )
-                for m in extra_messages
-            ]
-            if extra_messages is not None
-            else None
-        )
-        next_input_message = message_objects
-        counter = 0
-        total_usage = UsageStatistics()
-        step_count = 0
+            else:
+                raise ValueError("input_messages items must be Message or MessageCreate, " f"got {type(m)}")
 
-        # Load retained messages from DB if client has retention configured
+        # Read retained history from the parent scope (for sub-agents) or from this
+        # agent's scope (for top-level agents/meta). This keeps sub-agent inputs as a
+        # single packed message while still providing parent retained context.
         retention = (self.actor.message_set_retention_count or 0) if self.actor else 0
-        retained_messages: List[Message] = []
-        if retention > 0 and self.actor and self.user_id:
-            retained_messages = await self.message_manager.get_messages_for_agent_user(
-                agent_id=self.agent_state.id,
+        retention_agent_id = (
+            self.agent_state.parent_id or self.agent_state.id
+        )  # Retained messages in the DB are associated with the meta agent
+        should_read_retention = retention > 0 and self.actor and self.user_id
+        is_meta_agent = self.agent_state.is_type(AgentType.meta_memory_agent)
+        should_write_retention = retention > 0 and is_meta_agent and self.actor and self.user_id
+        retained_input_sets: List[Message] = []
+        if should_read_retention:
+            retained_input_sets = await self.message_manager.get_messages_for_agent_user(
+                agent_id=retention_agent_id,
                 user_id=self.user_id,
                 actor=self.actor,
                 limit=retention,
             )
 
-        # In-memory accumulator: starts with retained messages, grows with each chaining step
-        accumulated: List[Message] = list(retained_messages)
-        # Track the initial input messages for retention write-back
-        input_messages_for_persistence = message_objects
-
-        # initial_message_count = system_msg (1) + retained messages
-        initial_message_count = 1 + len(retained_messages)
+        # Chaining accumulator for the active agent loop only.
+        accumulated: List[Message] = list(retained_input_sets)
+        # Persist only the original input payload, never synthetic helper messages
+        # appended to iteration messages during meta-agent processing.
+        input_messages_for_persistence: List[Message] = list(normalized_input_messages)
 
         # Initialize the LLM client once per step to reuse across retries.
         llm_client = LLMClient.create(
             llm_config=self.agent_state.llm_config,
         )
 
+        if self.agent_state.is_type(AgentType.meta_memory_agent):
+            # Extract topics from retained context + current input messages.
+            try:
+                # make sure to include both retained context and current input messages in the search topic extraction
+                topics = await self._extract_topics_from_messages(retained_input_sets + normalized_input_messages)
+
+                if topics is not None:
+                    kwargs["topics"] = topics
+                else:
+                    printv(f"[Mirix.Agent.{self.agent_state.name}] WARNING: No topics extracted from input")
+
+            except Exception as e:
+                printv(f"[Mirix.Agent.{self.agent_state.name}] INFO: Error in extracting the topic from the input: {e}")
+                pass
+
+        # Main loop:ing
+        # Each iteration calls inner_step and then makes a decision about whether to continue chaining
+        # or to terminate the step. When chaining, the curren_input_messages are updated to reference
+        # a heartbeat message (e.g. "function failed", "continue chaining", etc.) and the previous input messages
+        # are added to the in-memory accumulator.
+        counter = 0
+        total_usage = UsageStatistics()
+        step_count = 0
+        loop_input_messages: List[Message] = list(normalized_input_messages)
         while True:
             kwargs["first_message"] = False
             kwargs["step_count"] = step_count
 
-            if self.agent_state.is_type(AgentType.meta_memory_agent, AgentType.chat_agent) and step_count == 0:
-                # When the agent first gets the screenshots, we need to extract the topic to search the query.
-                try:
-                    topics = await self._extract_topics_from_messages(next_input_message)
-
-                    if topics is not None:
-                        kwargs["topics"] = topics
-                    else:
-                        printv(f"[Mirix.Agent.{self.agent_state.name}] WARNING: No topics extracted from screenshots")
-
-                except Exception as e:
-                    printv(
-                        f"[Mirix.Agent.{self.agent_state.name}] INFO: Error in extracting the topic from the screenshots: {e}"
-                    )
-                    pass
-
+            loop_iteration_messages = list(loop_input_messages)
             if self.agent_state.is_type(AgentType.meta_memory_agent) and step_count == 0:
                 meta_message = prepare_input_message_create(
                     MessageCreate(
@@ -1379,14 +1381,11 @@ async def step(
                     wrap_user_message=False,
                     wrap_system_message=True,
                 )
-                next_input_message.append(meta_message)
+                loop_iteration_messages.append(meta_message)
 
             step_response = await self.inner_step(
-                first_input_messge=first_input_message,
-                messages=next_input_message,
+                messages=loop_iteration_messages,
                 accumulated=accumulated,
-                extra_messages=extra_message_objects,
-                initial_message_count=initial_message_count,
                 chaining=chaining,
                 llm_client=llm_client,
                 **kwargs,
@@ -1394,7 +1393,6 @@ async def step(
 
             continue_chaining = step_response.continue_chaining
             function_failed = step_response.function_failed
-            token_warning = step_response.in_context_memory_warning
             usage = step_response.usage
 
             # Accumulate step messages for next chaining iteration
@@ -1415,60 +1413,54 @@ async def step(
                     warning_content = "[System Message] You have reached the maximum chaining steps. Please call 'send_message' to send your response to the user."
                 else:
                     warning_content = "[System Message] You have reached the maximum chaining steps. Please call 'finish_memory_update' to end the chaining."
-                next_input_message = Message.dict_to_message(
-                    agent_id=self.agent_state.id,
-                    model=self.model,
-                    openai_message_dict={
-                        "role": "user",
-                        "content": warning_content,
-                    },
-                )
+                loop_input_messages = [
+                    Message.dict_to_message(
+                        agent_id=self.agent_state.id,
+                        model=self.model,
+                        openai_message_dict={
+                            "role": "user",
+                            "content": warning_content,
+                        },
+                    )
+                ]
                 continue  # give agent one more chance to respond
             elif max_chaining_steps is not None and counter > max_chaining_steps:
                 printv(
                     f"[Mirix.Agent.{self.agent_state.name}] INFO: Hit max chaining steps, stopping after {counter} steps"
                 )
                 break
-            # Chain handlers
-            elif token_warning and summarizer_settings.send_memory_warning_message:
-                assert self.agent_state.created_by_id is not None
-                next_input_message = Message.dict_to_message(
-                    agent_id=self.agent_state.id,
-                    model=self.model,
-                    openai_message_dict={
-                        "role": "user",  # TODO: change to system?
-                        "content": get_token_limit_warning(),
-                    },
-                )
-                continue  # always chain
             elif function_failed:
                 assert self.agent_state.created_by_id is not None
-                next_input_message = Message.dict_to_message(
-                    agent_id=self.agent_state.id,
-                    model=self.model,
-                    openai_message_dict={
-                        "role": "user",  # TODO: change to system?
-                        "content": get_contine_chaining(FUNC_FAILED_HEARTBEAT_MESSAGE),
-                    },
-                )
+                loop_input_messages = [
+                    Message.dict_to_message(
+                        agent_id=self.agent_state.id,
+                        model=self.model,
+                        openai_message_dict={
+                            "role": "user",  # TODO: change to system?
+                            "content": get_contine_chaining(FUNC_FAILED_HEARTBEAT_MESSAGE),
+                        },
+                    )
+                ]
                 continue  # always chain
             elif continue_chaining:
                 assert self.agent_state.created_by_id is not None
-                next_input_message = Message.dict_to_message(
-                    agent_id=self.agent_state.id,
-                    model=self.model,
-                    openai_message_dict={
-                        "role": "user",  # TODO: change to system?
-                        "content": get_contine_chaining(REQ_HEARTBEAT_MESSAGE),
-                    },
-                )
+                loop_input_messages = [
+                    Message.dict_to_message(
+                        agent_id=self.agent_state.id,
+                        model=self.model,
+                        openai_message_dict={
+                            "role": "user",  # TODO: change to system?
+                            "content": get_contine_chaining(REQ_HEARTBEAT_MESSAGE),
+                        },
+                    )
+                ]
                 continue  # always chain
             # Mirix no-op / yield
             else:
                 break
 
         # Retention write-back: persist input messages and prune old ones if configured
-        if retention > 0 and self.actor and self.user_id and input_messages_for_persistence:
+        if should_write_retention and input_messages_for_persistence:
             await self.message_manager.create_many_messages(
                 input_messages_for_persistence,
                 actor=self.actor,
@@ -2000,23 +1992,16 @@ async def construct_system_message(self, message: str) -> str:
 
     async def inner_step(
         self,
-        first_input_messge: Message,
         messages: Union[Message, List[Message]],
         accumulated: Optional[List[Message]] = None,
-        first_message: bool = False,
-        first_message_retry_limit: int = FIRST_MESSAGE_ATTEMPTS,
-        skip_verify: bool = False,
         stream: bool = False,  # TODO move to config?
         step_count: Optional[int] = None,
-        metadata: Optional[dict] = None,
         force_response: bool = False,
         topics: Optional[str] = None,
         retrieved_memories: Optional[dict] = None,
         display_intermediate_message: any = None,
         request_user_confirmation: Optional[Callable] = None,
         existing_file_uris: Optional[List[str]] = None,
-        extra_messages: Optional[List[dict]] = None,
-        initial_message_count: Optional[int] = None,
         return_memory_types_without_update: bool = False,
         message_queue: Optional[any] = None,
         chaining: bool = True,
@@ -2057,18 +2042,15 @@ async def inner_step(
                 messages = [messages]
 
             if not all(isinstance(m, Message) for m in messages):
-                raise ValueError(f"messages should be a Message or a list of Message, got {type(messages)}")
+                message_types = [type(m).__name__ for m in messages]
+                raise ValueError(
+                    "messages should be a Message or a list of Message, "
+                    f"got container={type(messages)}, elements={message_types}"
+                )
 
             # Build sequence: [system] + accumulated (prior chaining steps) + current messages
             input_message_sequence = [system_msg] + accumulated + messages
 
-            if extra_messages is not None:
-                input_message_sequence = (
-                    input_message_sequence[:initial_message_count]
-                    + extra_messages
-                    + input_message_sequence[initial_message_count:]
-                )
-
             if len(input_message_sequence) > 1 and input_message_sequence[-1].role != "user":
                 printv(
                     f"[Mirix.Agent.{self.agent_state.name}] WARNING: {CLI_WARNING_PREFIX}Attempting to run ChatCompletion without user as the last message in the queue"
@@ -2077,7 +2059,6 @@ async def inner_step(
             # Step 2: send the conversation and available functions to the LLM
             response = await self._get_ai_reply(
                 message_sequence=input_message_sequence,
-                first_message=first_message,
                 stream=stream,
                 step_count=step_count,
                 existing_file_uris=existing_file_uris,
@@ -2109,7 +2090,7 @@ async def inner_step(
             for response_choice in response.choices:
                 response_message = response_choice.message
                 tmp_response_messages, continue_chaining, function_failed = await self._handle_ai_response(
-                    first_input_messge,  # give the last message to the function so that other agents can see this message through funciton_calls
+                    messages[0],  # Input messages are always packed into a single MessageCreate object
                     response_message,
                     existing_file_uris=existing_file_uris,
                     # TODO this is kind of hacky, find a better way to handle this
@@ -2149,61 +2130,12 @@ async def inner_step(
                         f"[Mirix.Agent.{self.agent_state.name}] ERROR: Function execution encountered errors (see logs above for details)"
                     )
 
-            # if function_failed:
-
-            #     inputs = self._get_ai_reply(
-            #         message_sequence=input_message_sequence,
-            #         first_message=first_message,
-            #         stream=stream,
-            #         step_count=step_count,
-            #         # extra_messages=extra_messages,
-            #         get_input_data_for_debugging=True
-            #     )
-
-            #     try:
-            #         error = json.loads(all_response_messages[-1].content[0].text)
-            #     except:
-            #         error = 'Not Known'
-
-            #     response_json = response.model_dump()
-            #     response_json.pop('created', None)
-            #     results_to_log = {
-            #         'input': inputs,
-            #         'output': response_json,
-            #         'error': error
-            #     }
-
-            #     if not os.path.exists("debug"):
-            #         os.makedirs("debug")
-            #     count = 0
-            #     while os.path.exists(f"debug/debug_{count}.json"):
-            #         count += 1
-            #     with open(f"debug/debug_{count}.json", "w") as f:
-            #         json.dump(results_to_log, f, indent=2)
-
             # Step 6: extend the message history
             if len(messages) > 0:
                 all_new_messages = messages + all_response_messages
             else:
                 all_new_messages = all_response_messages
 
-            # Check the memory pressure and potentially issue a memory pressure warning
-            current_total_tokens = response.usage.total_tokens
-            active_memory_warning = False
-
-            # We can't do summarize logic properly if context_window is undefined
-            if self.agent_state.llm_config.context_window is None:
-                # Fallback if for some reason context_window is missing, just set to the default
-                printv(
-                    f"[Mirix.Agent.{self.agent_state.name}] WARNING: Could not find context_window in config, setting to default {LLM_MAX_TOKENS['DEFAULT']}"
-                )
-                printv(f"[Mirix.Agent.{self.agent_state.name}] DEBUG: Agent state: {self.agent_state}")
-                self.agent_state.llm_config.context_window = (
-                    LLM_MAX_TOKENS[self.model]
-                    if (self.model is not None and self.model in LLM_MAX_TOKENS)
-                    else LLM_MAX_TOKENS["DEFAULT"]
-                )
-
             # Log step
             step = await self.step_manager.log_step(
                 actor=self.actor,
@@ -2215,24 +2147,6 @@ async def inner_step(
             for message in all_new_messages:
                 message.step_id = step.id
 
-            # Check memory pressure (warning only — no in-loop summarization)
-            if current_total_tokens > summarizer_settings.memory_warning_threshold * int(
-                self.agent_state.llm_config.context_window
-            ):
-                printv(
-                    f"[Mirix.Agent.{self.agent_state.name}] INFO: Memory pressure detected: last response total_tokens ({current_total_tokens}) > {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
-                )
-
-                # Only deliver the alert if we haven't already (this period)
-                if not self.agent_alerted_about_memory_pressure:
-                    active_memory_warning = True
-                    self.agent_alerted_about_memory_pressure = True  # it's up to the outer loop to handle this
-
-            else:
-                printv(
-                    f"[Mirix.Agent.{self.agent_state.name}] DEBUG: Memory usage acceptable: last response total_tokens ({current_total_tokens}) < {summarizer_settings.memory_warning_threshold * int(self.agent_state.llm_config.context_window)}"
-                )
-
             # Log step completion and results
             printv(
                 f"[Mirix.Agent.{self.agent_state.name}] INFO: Agent step completed - continue_chaining: {continue_chaining}, function_failed: {function_failed}, messages_generated: {len(all_new_messages)}"
@@ -2242,12 +2156,13 @@ async def inner_step(
                 messages=all_new_messages,
                 continue_chaining=continue_chaining,
                 function_failed=function_failed,
-                in_context_memory_warning=active_memory_warning,
                 usage=response.usage,
             )
 
         except Exception as e:
-            printv(f"[Mirix.Agent.{self.agent_state.name}] ERROR: inner_step() failed\nmessages = {messages}\nerror = {e}")
+            printv(
+                f"[Mirix.Agent.{self.agent_state.name}] ERROR: inner_step() failed\nmessages = {messages}\nerror = {e}"
+            )
             if is_context_overflow_error(e):
                 num_accumulated = len(accumulated) + len(messages)
                 err_msg = f"Context window exceeded for agent id={self.agent_state.id} with {num_accumulated} in-context messages."
@@ -2316,16 +2231,20 @@ def migrate_embedding(self, embedding_config: EmbeddingConfig):
 
     async def get_context_window(self) -> ContextWindowOverview:
         """Get the context window of the agent"""
+        from mirix.schemas.agent import AgentType
 
         system_prompt = self.agent_state.system  # TODO is this the current system or the initial system?
         num_tokens_system = count_tokens(system_prompt)
         core_memory = self.blocks_in_memory.compile() if self.blocks_in_memory else ""
         num_tokens_core_memory = count_tokens(core_memory)
 
-        # Grab recent messages from DB for token counting purposes
+        # Grab recent retained messages from DB for token counting purposes.
+        # Retention is only applied to meta agent history.
         retention = (self.actor.message_set_retention_count or 0) if self.actor else 0
+        is_meta_agent = self.agent_state.is_type(AgentType.meta_memory_agent)
+        should_use_retention = retention > 0 and is_meta_agent and self.actor and self.user_id
         db_messages: List[Message] = []
-        if retention > 0 and self.actor and self.user_id:
+        if should_use_retention:
             db_messages = await self.message_manager.get_messages_for_agent_user(
                 agent_id=self.agent_state.id,
                 user_id=self.user_id,
diff --git a/mirix/schemas/agent.py b/mirix/schemas/agent.py
index f04e37954..6bc304e1a 100755
--- a/mirix/schemas/agent.py
+++ b/mirix/schemas/agent.py
@@ -302,10 +302,6 @@ class AgentStepResponse(BaseModel):
         description="Whether the agent requested a contine_chaining (i.e. follow-up execution).",
     )
     function_failed: bool = Field(..., description="Whether the agent step ended because a function call failed.")
-    in_context_memory_warning: bool = Field(
-        ...,
-        description="Whether the agent step ended because the in-context memory is near its limit.",
-    )
     usage: UsageStatistics = Field(..., description="Usage statistics of the LLM call during the agent's step.")
     traj: Optional[dict] = Field(None, description="Action, Observation, State at the current step")
 
diff --git a/mirix/server/rest_api.py b/mirix/server/rest_api.py
index 1d5fac470..92bec53a3 100644
--- a/mirix/server/rest_api.py
+++ b/mirix/server/rest_api.py
@@ -2035,6 +2035,8 @@ async def add_memory(
                 raise ValueError(f"Invalid content type: {type(content)}")
         message = new_message
 
+    # N.b. This function converts to Mirix format and also packs all messages into a single MessageCreate object
+    # so, there will be only one MessageCreate object in the list
     input_messages = convert_message_to_mirix_message(message)
 
     # Add client scope to filter_tags (create if not provided)
diff --git a/mirix/server/server.py b/mirix/server/server.py
index ce80a2411..145c19b66 100644
--- a/mirix/server/server.py
+++ b/mirix/server/server.py
@@ -700,6 +700,8 @@ async def _step(
     ) -> MirixUsageStatistics:
         """Send the input message through the agent"""
         logger.debug("Got input messages: %s", input_messages)
+        if user is None:
+            raise ValueError("AsyncServer._step requires a non-null user.")
         mirix_agent = None
         try:
             mirix_agent = await self.load_agent(
@@ -859,11 +861,11 @@ async def _command(self, user_id: str, agent_id: str, command: str) -> MirixUsag
 
         elif command.lower() == "contine_chaining":
             input_message = system.get_contine_chaining()
-            usage = await self._step(actor=actor, agent_id=agent_id, input_messages=input_message)
+            usage = await self._step(actor=actor, agent_id=agent_id, input_messages=input_message, user=actor)
 
         elif command.lower() == "memorywarning":
             input_message = system.get_token_limit_warning()
-            usage = await self._step(actor=actor, agent_id=agent_id, input_messages=input_message)
+            usage = await self._step(actor=actor, agent_id=agent_id, input_messages=input_message, user=actor)
 
         if not usage:
             usage = MirixUsageStatistics()
@@ -919,7 +921,7 @@ async def user_message(
                 )
 
         # Run the agent state forward
-        usage = await self._step(actor=actor, agent_id=agent_id, input_messages=message)
+        usage = await self._step(actor=actor, agent_id=agent_id, input_messages=message, user=actor)
         return usage
 
     async def system_message(
@@ -984,7 +986,7 @@ async def system_message(
             message.created_at = timestamp
 
         # Run the agent state forward
-        return await self._step(actor=actor, agent_id=agent_id, input_messages=message)
+        return await self._step(actor=actor, agent_id=agent_id, input_messages=message, user=actor)
 
     async def construct_system_message(self, agent_id: str, message: str, actor: Client) -> str:
         """
@@ -1047,6 +1049,9 @@ async def send_messages(
 
             set_verbose(verbose)
 
+        if user is None:
+            user = await self.user_manager.get_admin_user()
+
         try:
             # Run the agent state forward
             return await self._step(
diff --git a/mirix/utils.py b/mirix/utils.py
index 4d7006f97..b95d82fe0 100755
--- a/mirix/utils.py
+++ b/mirix/utils.py
@@ -22,16 +22,7 @@
 from functools import wraps
 from logging import Logger
 from pathlib import Path
-from typing import (
-    TYPE_CHECKING,
-    List,
-    Optional,
-    Union,
-    _GenericAlias,
-    get_args,
-    get_origin,
-    get_type_hints,
-)
+from typing import TYPE_CHECKING, List, Optional, Union, _GenericAlias, get_args, get_origin, get_type_hints
 from urllib.parse import urljoin, urlparse
 
 import demjson3 as demjson
@@ -1797,6 +1788,37 @@ def convert_message_to_mirix_message(
     file_manager: Optional["FileManager"] = None,
     images_dir: Optional[Path] = None,
 ) -> List[MessageCreate]:
+    """Convert raw API-style payloads into MIRIX ``MessageCreate`` objects.
+
+    This helper always returns ``List[MessageCreate]``.
+
+    Key behavior for the ``/memory/add`` save flow:
+    - The API first flattens multi-turn input (e.g. user/assistant turns) into a
+      single content list with ``[USER]``/``[ASSISTANT]`` text markers.
+    - This function then wraps that flattened list into exactly ONE
+      ``MessageCreate`` (usually with role ``user``).
+    - So a multi-turn save payload becomes ``[single MessageCreate]``.
+
+    Examples:
+        String input:
+            >>> convert_message_to_mirix_message("hello")
+            [MessageCreate(role="user", content=[TextContent(text="hello")])]
+
+       packed conversation:
+            >>> convert_message_to_mirix_message(
+            ...     [
+            ...         {"type": "text", "text": "[USER]"},
+            ...         {"type": "text", "text": "hi"},
+            ...         {"type": "text", "text": "[ASSISTANT]"},
+            ...         {"type": "text", "text": "hello there"},
+            ...     ]
+            ... )
+            [MessageCreate(role="user", content=[TextContent(...), ...])]
+
+        The caller can override role:
+            >>> convert_message_to_mirix_message("system note", role="system")
+            [MessageCreate(role="system", content=[TextContent(text="system note")])]
+    """
     if isinstance(message, str):
         content = [TextContent(text=message)]
         input_messages = [MessageCreate(role=MessageRole(role), content=content)]

From 9a4c1fdea3988d1db42828bef9cb2dca338367f8 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Sun, 22 Mar 2026 08:01:38 -0700
Subject: [PATCH 10/13] chore: remove unneeded env var

---
 mirix/constants.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/mirix/constants.py b/mirix/constants.py
index da61be898..1534416dc 100644
--- a/mirix/constants.py
+++ b/mirix/constants.py
@@ -229,11 +229,6 @@
 INNER_THOUGHTS_CLI_SYMBOL = "💭"
 ASSISTANT_MESSAGE_CLI_SYMBOL = "🤖"
 
-CLEAR_HISTORY_AFTER_MEMORY_UPDATE = os.getenv("CLEAR_HISTORY_AFTER_MEMORY_UPDATE", "true").lower() in (
-    "true",
-    "1",
-    "yes",
-)
 CALL_MEMORY_AGENT_IN_PARALLEL = os.getenv("CALL_MEMORY_AGENT_IN_PARALLEL", "false").lower() in ("true", "1", "yes")
 CHAINING_FOR_MEMORY_UPDATE = os.getenv("CHAINING_FOR_MEMORY_UPDATE", "false").lower() in ("true", "1", "yes")
 CHAINING_FOR_META_AGENT = os.getenv("CHAINING_FOR_META_AGENT", "true").lower() in (

From 9b94bf536c099376482579b48d803ea19c60ca73 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Sun, 22 Mar 2026 08:47:26 -0700
Subject: [PATCH 11/13] test: add tests

---
 tests/test_memory_integration.py |  52 +++++++++
 tests/test_memory_server.py      |   8 +-
 tests/test_message_handling.py   | 180 +++++++++++++++++++++++++++++++
 3 files changed, 239 insertions(+), 1 deletion(-)

diff --git a/tests/test_memory_integration.py b/tests/test_memory_integration.py
index 03eb8072d..db685dda1 100644
--- a/tests/test_memory_integration.py
+++ b/tests/test_memory_integration.py
@@ -484,12 +484,31 @@ async def test_no_messages_persisted_with_zero_retention(msg_client):
     print("  Waiting for queue processing (15s)...")
     await asyncio.sleep(15)
 
+    synthetic_markers = [
+        "[System Message] As the meta memory manager",
+        "continue chaining",
+        "function failed",
+        "finish_memory_update",
+    ]
+
     for name, aid in agent_map.items():
         messages = await _get_message_rows(
             agent_id=aid,
             user_id=MSG_TEST_USER_ID,
             org_id=TEST_ORG_ID,
         )
+        for message in messages:
+            content_text = " ".join(
+                [
+                    c.text
+                    for c in (message.content or [])
+                    if hasattr(c, "text") and c.text
+                ]
+            )
+            assert not any(marker in content_text for marker in synthetic_markers), (
+                f"Agent '{name}' persisted synthetic helper content with retention=0: "
+                f"{content_text}"
+            )
         assert len(messages) == 0, (
             f"Agent '{name}' should have 0 message rows with "
             f"retention=0, found {len(messages)}"
@@ -565,6 +584,39 @@ async def test_message_retention_prunes_to_limit(msg_client):
             f"retention=2, found {len(messages)}"
         )
 
+        synthetic_markers = [
+            "[System Message] As the meta memory manager",
+            "continue chaining",
+            "function failed",
+            "finish_memory_update",
+        ]
+        for message in messages:
+            content_text = " ".join(
+                [
+                    c.text
+                    for c in (message.content or [])
+                    if hasattr(c, "text") and c.text
+                ]
+            )
+            assert not any(marker in content_text for marker in synthetic_markers), (
+                "Retention rows should contain only persisted input message sets, "
+                f"found synthetic helper content: {content_text}"
+            )
+
+        # Sub-agents should not persist retained input message sets.
+        for name, aid in agent_map.items():
+            if name == "meta_memory_agent":
+                continue
+            sub_messages = await _get_message_rows(
+                agent_id=aid,
+                user_id=MSG_TEST_USER_ID,
+                org_id=TEST_ORG_ID,
+            )
+            assert len(sub_messages) == 0, (
+                f"Sub-agent '{name}' should not persist retained message rows; "
+                f"found {len(sub_messages)}"
+            )
+
     finally:
         reset_client = await server.client_manager.update_client(
             ClientUpdate(id=MSG_TEST_CLIENT_ID, message_set_retention_count=0)
diff --git a/tests/test_memory_server.py b/tests/test_memory_server.py
index 79af281a8..5174734b3 100644
--- a/tests/test_memory_server.py
+++ b/tests/test_memory_server.py
@@ -719,7 +719,7 @@ async def test_trigger_memory_update_runs_in_parallel(self, server, client, user
         relevant_agents = [agent for agent in child_agents if agent.agent_type in target_types]
         assert len(relevant_agents) == len(target_types), "Expected episodic and procedural memory agents to be present"
 
-        tracker = {"start_times": {}, "thread_ids": {}}
+        tracker = {"start_times": {}, "thread_ids": {}, "input_lengths": {}}
         lock = threading.Lock()
 
         class MockMemoryAgent:
@@ -731,6 +731,9 @@ async def step(self, input_messages, chaining, actor=None, user=None, topics=Non
                 with lock:
                     tracker["start_times"][memory_type] = time.perf_counter()
                     tracker["thread_ids"][memory_type] = threading.get_ident()
+                    tracker["input_lengths"][memory_type] = (
+                        len(input_messages) if isinstance(input_messages, list) else 1
+                    )
                 await asyncio.sleep(1)
 
         monkeypatch.setattr("mirix.agent.EpisodicMemoryAgent", MockMemoryAgent)
@@ -767,6 +770,9 @@ async def list_agents(self, *, parent_id, actor, limit=None, **kwargs):
         assert set(start_times.keys()) == {"episodic", "procedural"}
         gap = abs(start_times["episodic"] - start_times["procedural"])
 
+        # Child agents should receive a single packed input message.
+        assert tracker["input_lengths"] == {"episodic": 1, "procedural": 1}
+
         # Both updates ran; gap < 0.45s indicates concurrent execution (asyncio uses one thread).
         assert gap < 0.45, f"Expected parallel execution, gap was {gap:.3f}s"
 
diff --git a/tests/test_message_handling.py b/tests/test_message_handling.py
index 77ef95548..fc3322ff8 100644
--- a/tests/test_message_handling.py
+++ b/tests/test_message_handling.py
@@ -11,12 +11,18 @@
 import asyncio
 from contextlib import asynccontextmanager
 from datetime import datetime
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
+from mirix.agent.agent import Agent
+from mirix.schemas.agent import AgentState, AgentStepResponse, AgentType
 from mirix.schemas.client import Client
+from mirix.schemas.embedding_config import EmbeddingConfig
+from mirix.schemas.llm_config import LLMConfig
 from mirix.schemas.message import Message
+from mirix.schemas.openai.chat_completion_response import UsageStatistics
 from mirix.schemas.user import User
 from mirix.services.message_manager import MessageManager
 
@@ -199,3 +205,177 @@ def test_client_with_retention_has_correct_value(self):
         """Clients configured with retention=5 expose that value."""
         client = make_client(retention=5)
         assert client.message_set_retention_count == 5
+
+
+def make_agent_state(
+    agent_id: str,
+    agent_type: AgentType,
+    parent_id: str | None = None,
+) -> AgentState:
+    """Create a minimal AgentState for unit-testing Agent.step."""
+    return AgentState(
+        id=agent_id,
+        name=agent_type.value,
+        system="System prompt",
+        agent_type=agent_type,
+        llm_config=LLMConfig.default_config("gpt-4o-mini"),
+        embedding_config=EmbeddingConfig.default_config(provider="openai"),
+        tools=[],
+        parent_id=parent_id,
+    )
+
+
+def make_runtime_message(agent_id: str, text: str) -> Message:
+    """Create a runtime Message object used by Agent.step."""
+    return Message.dict_to_message(
+        agent_id=agent_id,
+        model="gpt-4o-mini",
+        openai_message_dict={"role": "user", "content": text},
+    )
+
+
+def build_step_test_agent(agent_state: AgentState, user: User) -> Agent:
+    """Build an Agent instance with only fields required by step()."""
+    agent = Agent.__new__(Agent)
+    agent.agent_state = agent_state
+    agent.user = user
+    agent.user_id = user.id
+    agent.client_id = "client-1"
+    agent.model = "gpt-4o-mini"
+    agent.filter_tags = None
+    agent.block_filter_tags = None
+    agent._block_scopes = None
+    agent.blocks_in_memory = None
+    agent.last_function_response = None
+    agent.block_manager = SimpleNamespace(get_blocks=AsyncMock(return_value=[]))
+    agent.message_manager = SimpleNamespace(
+        get_messages_for_agent_user=AsyncMock(return_value=[]),
+        create_many_messages=AsyncMock(return_value=[]),
+        hard_delete_user_messages_for_agent=AsyncMock(return_value=0),
+    )
+    agent._extract_topics_from_messages = AsyncMock(return_value="topic-a;topic-b")
+    agent.inner_step = AsyncMock(
+        return_value=AgentStepResponse(
+            messages=[],
+            continue_chaining=False,
+            function_failed=False,
+            usage=UsageStatistics(),
+            traj={},
+        )
+    )
+    agent.interface = SimpleNamespace(step_complete=lambda: None)
+    return agent
+
+
+class TestAgentStepRetentionAndTopics:
+    @pytest.mark.asyncio
+    async def test_step_reads_retention_from_parent_scope_for_sub_agent(self):
+        user = make_user()
+        client = make_client(retention=2)
+        agent_state = make_agent_state(
+            agent_id="agent-child",
+            agent_type=AgentType.episodic_memory_agent,
+            parent_id="agent-meta",
+        )
+        agent = build_step_test_agent(agent_state, user)
+        agent.message_manager.get_messages_for_agent_user = AsyncMock(return_value=[make_runtime_message("agent-meta", "r1")])
+
+        with patch("mirix.agent.agent.LLMClient.create", return_value=object()):
+            await agent.step(
+                input_messages=make_runtime_message("agent-child", "current"),
+                chaining=False,
+                actor=client,
+                user=user,
+            )
+
+        agent.message_manager.get_messages_for_agent_user.assert_awaited_once()
+        read_kwargs = agent.message_manager.get_messages_for_agent_user.await_args.kwargs
+        assert read_kwargs["agent_id"] == "agent-meta"
+        assert read_kwargs["limit"] == 2
+        agent.message_manager.create_many_messages.assert_not_awaited()
+        agent.message_manager.hard_delete_user_messages_for_agent.assert_not_awaited()
+        agent._extract_topics_from_messages.assert_not_awaited()
+
+    @pytest.mark.asyncio
+    async def test_step_meta_persists_only_original_input_and_prunes(self):
+        user = make_user()
+        client = make_client(retention=2)
+        agent_state = make_agent_state(
+            agent_id="agent-meta",
+            agent_type=AgentType.meta_memory_agent,
+        )
+        agent = build_step_test_agent(agent_state, user)
+        original_input = make_runtime_message("agent-meta", "persist-me")
+        heartbeat_like_message = make_runtime_message("agent-meta", "heartbeat-ish follow-up")
+        agent.inner_step = AsyncMock(
+            return_value=AgentStepResponse(
+                messages=[heartbeat_like_message],
+                continue_chaining=False,
+                function_failed=False,
+                usage=UsageStatistics(),
+                traj={},
+            )
+        )
+
+        with patch("mirix.agent.agent.LLMClient.create", return_value=object()):
+            await agent.step(
+                input_messages=original_input,
+                chaining=False,
+                actor=client,
+                user=user,
+            )
+
+        persisted_messages = agent.message_manager.create_many_messages.await_args.args[0]
+        assert len(persisted_messages) == 1
+        assert persisted_messages[0].id == original_input.id
+
+        prune_kwargs = agent.message_manager.hard_delete_user_messages_for_agent.await_args.kwargs
+        assert prune_kwargs["agent_id"] == "agent-meta"
+        assert prune_kwargs["keep_newest_n"] == 2
+
+    @pytest.mark.asyncio
+    async def test_step_meta_extracts_topics_from_retained_plus_current(self):
+        user = make_user()
+        client = make_client(retention=2)
+        agent_state = make_agent_state(
+            agent_id="agent-meta",
+            agent_type=AgentType.meta_memory_agent,
+        )
+        agent = build_step_test_agent(agent_state, user)
+        retained_1 = make_runtime_message("agent-meta", "retained-one")
+        retained_2 = make_runtime_message("agent-meta", "retained-two")
+        current = make_runtime_message("agent-meta", "current-input")
+        agent.message_manager.get_messages_for_agent_user = AsyncMock(return_value=[retained_1, retained_2])
+
+        with patch("mirix.agent.agent.LLMClient.create", return_value=object()):
+            await agent.step(
+                input_messages=current,
+                chaining=False,
+                actor=client,
+                user=user,
+            )
+
+        extract_arg = agent._extract_topics_from_messages.await_args.args[0]
+        assert [m.id for m in extract_arg] == [retained_1.id, retained_2.id, current.id]
+
+    @pytest.mark.asyncio
+    async def test_step_retention_zero_skips_read_write_persistence(self):
+        user = make_user()
+        client = make_client(retention=0)
+        agent_state = make_agent_state(
+            agent_id="agent-meta",
+            agent_type=AgentType.meta_memory_agent,
+        )
+        agent = build_step_test_agent(agent_state, user)
+
+        with patch("mirix.agent.agent.LLMClient.create", return_value=object()):
+            await agent.step(
+                input_messages=make_runtime_message("agent-meta", "current-input"),
+                chaining=False,
+                actor=client,
+                user=user,
+            )
+
+        agent.message_manager.get_messages_for_agent_user.assert_not_awaited()
+        agent.message_manager.create_many_messages.assert_not_awaited()
+        agent.message_manager.hard_delete_user_messages_for_agent.assert_not_awaited()

From 038b2b19167e0afc1a393667b5779f9b5904cf19 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Sun, 22 Mar 2026 09:16:57 -0700
Subject: [PATCH 12/13] chore: dead code removal

---
 mirix/agent/agent.py    | 129 +---------------------------------------
 mirix/schemas/memory.py |  46 --------------
 mirix/server/server.py  |   5 +-
 3 files changed, 3 insertions(+), 177 deletions(-)

diff --git a/mirix/agent/agent.py b/mirix/agent/agent.py
index 58f026f6d..ca64bd9fb 100644
--- a/mirix/agent/agent.py
+++ b/mirix/agent/agent.py
@@ -44,10 +44,9 @@
 from mirix.schemas.client import Client
 from mirix.schemas.embedding_config import EmbeddingConfig
 from mirix.schemas.enums import MessageRole, ToolType
-from mirix.schemas.memory import ContextWindowOverview, Memory
+from mirix.schemas.memory import Memory
 from mirix.schemas.message import Message, MessageCreate
 from mirix.schemas.mirix_message_content import CloudFileContent, FileContent, ImageContent, TextContent
-from mirix.schemas.openai.chat_completion_request import Tool as ChatCompletionRequestTool
 from mirix.schemas.openai.chat_completion_response import ChatCompletionResponse
 from mirix.schemas.openai.chat_completion_response import Message as ChatCompletionMessage
 from mirix.schemas.openai.chat_completion_response import UsageStatistics
@@ -58,7 +57,7 @@
 from mirix.services.agent_manager import AgentManager
 from mirix.services.block_manager import BlockManager
 from mirix.services.episodic_memory_manager import EpisodicMemoryManager
-from mirix.services.helpers.agent_manager_helper import check_supports_structured_output, compile_memory_metadata_block
+from mirix.services.helpers.agent_manager_helper import check_supports_structured_output
 from mirix.services.knowledge_vault_manager import KnowledgeVaultManager
 from mirix.services.message_manager import MessageManager
 from mirix.services.procedural_memory_manager import ProceduralMemoryManager
@@ -72,15 +71,12 @@
 from mirix.tracing import trace_method
 from mirix.utils import (
     convert_timezone_to_utc,
-    count_tokens,
     get_friendly_error_msg,
     get_tool_call_id,
     get_utc_time,
     json_dumps,
     json_loads,
     log_telemetry,
-    num_tokens_from_functions,
-    num_tokens_from_messages,
     parse_json,
     printv,
     validate_function_response,
@@ -2229,127 +2225,6 @@ def migrate_embedding(self, embedding_config: EmbeddingConfig):
         # TODO: recall memory
         raise NotImplementedError()
 
-    async def get_context_window(self) -> ContextWindowOverview:
-        """Get the context window of the agent"""
-        from mirix.schemas.agent import AgentType
-
-        system_prompt = self.agent_state.system  # TODO is this the current system or the initial system?
-        num_tokens_system = count_tokens(system_prompt)
-        core_memory = self.blocks_in_memory.compile() if self.blocks_in_memory else ""
-        num_tokens_core_memory = count_tokens(core_memory)
-
-        # Grab recent retained messages from DB for token counting purposes.
-        # Retention is only applied to meta agent history.
-        retention = (self.actor.message_set_retention_count or 0) if self.actor else 0
-        is_meta_agent = self.agent_state.is_type(AgentType.meta_memory_agent)
-        should_use_retention = retention > 0 and is_meta_agent and self.actor and self.user_id
-        db_messages: List[Message] = []
-        if should_use_retention:
-            db_messages = await self.message_manager.get_messages_for_agent_user(
-                agent_id=self.agent_state.id,
-                user_id=self.user_id,
-                actor=self.actor,
-                limit=retention,
-            )
-        # Prepend synthetic system message for context window calculation
-        system_msg_for_count = Message.dict_to_message(
-            agent_id=self.agent_state.id,
-            model=self.model,
-            openai_message_dict={"role": "system", "content": system_prompt},
-        )
-        in_context_messages = [system_msg_for_count] + db_messages
-        in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
-
-        # Check if there's a summary message in the message queue
-        if (
-            len(in_context_messages) > 1
-            and in_context_messages[1].role == MessageRole.user
-            and isinstance(in_context_messages[1].text, str)
-            # TODO remove hardcoding
-            and "The following is a summary of the previous " in in_context_messages[1].text
-        ):
-            # Summary message exists
-            assert in_context_messages[1].text is not None
-            summary_memory = in_context_messages[1].text
-            num_tokens_summary_memory = count_tokens(in_context_messages[1].text)
-            # with a summary message, the real messages start at index 2
-            num_tokens_messages = (
-                num_tokens_from_messages(messages=in_context_messages_openai[2:], model=self.model)
-                if len(in_context_messages_openai) > 2
-                else 0
-            )
-
-        else:
-            summary_memory = None
-            num_tokens_summary_memory = 0
-            # with no summary message, the real messages start at index 1
-            num_tokens_messages = (
-                num_tokens_from_messages(messages=in_context_messages_openai[1:], model=self.model)
-                if len(in_context_messages_openai) > 1
-                else 0
-            )
-
-        message_manager_size = await self.message_manager.size(
-            actor=self.actor, agent_id=self.agent_state.id, user_id=self.user_id
-        )
-        external_memory_summary = compile_memory_metadata_block(
-            memory_edit_timestamp=get_utc_time(),
-            previous_message_count=await self.message_manager.size(
-                actor=self.actor, agent_id=self.agent_state.id, user_id=self.user_id
-            ),
-        )
-        num_tokens_external_memory_summary = count_tokens(external_memory_summary)
-
-        # tokens taken up by function definitions
-        agent_state_tool_jsons = [t.json_schema for t in self.agent_state.tools]
-        if agent_state_tool_jsons:
-            available_functions_definitions = [
-                ChatCompletionRequestTool(type="function", function=f) for f in agent_state_tool_jsons
-            ]
-            num_tokens_available_functions_definitions = num_tokens_from_functions(
-                functions=agent_state_tool_jsons, model=self.model
-            )
-        else:
-            available_functions_definitions = []
-            num_tokens_available_functions_definitions = 0
-
-        num_tokens_used_total = (
-            num_tokens_system  # system prompt
-            + num_tokens_available_functions_definitions  # function definitions
-            + num_tokens_core_memory  # core memory
-            + num_tokens_external_memory_summary  # metadata (statistics) about recall/archival
-            + num_tokens_summary_memory  # summary of ongoing conversation
-            + num_tokens_messages  # tokens taken by messages
-        )
-        assert isinstance(num_tokens_used_total, int)
-
-        return ContextWindowOverview(
-            # context window breakdown (in messages)
-            num_messages=len(in_context_messages),
-            num_recall_memory=message_manager_size,
-            num_tokens_external_memory_summary=num_tokens_external_memory_summary,
-            external_memory_summary=external_memory_summary,
-            # top-level information
-            context_window_size_max=self.agent_state.llm_config.context_window,
-            context_window_size_current=num_tokens_used_total,
-            # context window breakdown (in tokens)
-            num_tokens_system=num_tokens_system,
-            system_prompt=system_prompt,
-            num_tokens_core_memory=num_tokens_core_memory,
-            core_memory=core_memory,
-            num_tokens_summary_memory=num_tokens_summary_memory,
-            summary_memory=summary_memory,
-            num_tokens_messages=num_tokens_messages,
-            messages=in_context_messages,
-            # related to functions
-            num_tokens_functions_definitions=num_tokens_available_functions_definitions,
-            functions_definitions=available_functions_definitions,
-        )
-
-    async def count_tokens(self) -> int:
-        """Count the tokens in the current context window"""
-        context_window_breakdown = await self.get_context_window()
-        return context_window_breakdown.context_window_size_current
 
 
 def strip_name_field_from_user_message(
diff --git a/mirix/schemas/memory.py b/mirix/schemas/memory.py
index 4f1258624..30cd07b73 100755
--- a/mirix/schemas/memory.py
+++ b/mirix/schemas/memory.py
@@ -9,55 +9,9 @@
 
 from mirix.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
 from mirix.schemas.block import Block
-from mirix.schemas.message import Message
-from mirix.schemas.openai.chat_completion_request import Tool
 from mirix.schemas.user import User as PydanticUser
 
 
-class ContextWindowOverview(BaseModel):
-    """
-    Overview of the context window, including the number of messages and tokens.
-    """
-
-    # top-level information
-    context_window_size_max: int = Field(..., description="The maximum amount of tokens the context window can hold.")
-    context_window_size_current: int = Field(..., description="The current number of tokens in the context window.")
-
-    # context window breakdown (in messages)
-    # (technically not in the context window, but useful to know)
-    num_messages: int = Field(..., description="The number of messages in the context window.")
-    num_archival_memory: int = Field(..., description="The number of messages in the archival memory.")
-    num_recall_memory: int = Field(..., description="The number of messages in the recall memory.")
-    num_tokens_external_memory_summary: int = Field(
-        ...,
-        description="The number of tokens in the external memory summary (archival + recall metadata).",
-    )
-    external_memory_summary: str = Field(
-        ...,
-        description="The metadata summary of the external memory sources (archival + recall metadata).",
-    )
-
-    # context window breakdown (in tokens)
-    # this should all add up to context_window_size_current
-
-    num_tokens_system: int = Field(..., description="The number of tokens in the system prompt.")
-    system_prompt: str = Field(..., description="The content of the system prompt.")
-
-    num_tokens_core_memory: int = Field(..., description="The number of tokens in the core memory.")
-    core_memory: str = Field(..., description="The content of the core memory.")
-
-    num_tokens_summary_memory: int = Field(..., description="The number of tokens in the summary memory.")
-    summary_memory: Optional[str] = Field(None, description="The content of the summary memory.")
-
-    num_tokens_functions_definitions: int = Field(..., description="The number of tokens in the functions definitions.")
-    functions_definitions: Optional[List[Tool]] = Field(..., description="The content of the functions definitions.")
-
-    num_tokens_messages: int = Field(..., description="The number of tokens in the messages list.")
-    # TODO make list of messages?
-    # messages: List[dict] = Field(..., description="The messages in the context window.")
-    messages: List[Message] = Field(..., description="The messages in the context window.")
-
-
 def line_numbers(value: str, prefix: str = "Line ") -> str:
     """
     Turn
diff --git a/mirix/server/server.py b/mirix/server/server.py
index 145c19b66..040d4535c 100644
--- a/mirix/server/server.py
+++ b/mirix/server/server.py
@@ -48,7 +48,7 @@
 # openai schemas
 from mirix.schemas.enums import MessageStreamStatus
 from mirix.schemas.llm_config import LLMConfig
-from mirix.schemas.memory import ContextWindowOverview, RecallMemorySummary
+from mirix.schemas.memory import RecallMemorySummary
 from mirix.schemas.message import Message, MessageCreate, MessageUpdate
 from mirix.schemas.mirix_message import LegacyMirixMessage, MirixMessage, ToolReturnMessage
 from mirix.schemas.mirix_response import MirixResponse
@@ -1289,9 +1289,6 @@ def add_llm_model(self, request: LLMConfig) -> LLMConfig:
     def add_embedding_model(self, request: EmbeddingConfig) -> EmbeddingConfig:
         """Add a new embedding model"""
 
-    async def get_agent_context_window(self, agent_id: str, actor: Client) -> ContextWindowOverview:
-        mirix_agent = await self.load_agent(agent_id=agent_id, actor=actor)
-        return await mirix_agent.get_context_window()
 
     async def run_tool_from_source(
         self,

From d5aecedac857ae35366a7827b60759dd0d9d7bf1 Mon Sep 17 00:00:00 2001
From: Lucas Parzych <lucas_parzych@intuit.com>
Date: Mon, 23 Mar 2026 12:35:33 -0700
Subject: [PATCH 13/13] feat: PR feedback

---
 mirix/agent/agent.py                          | 110 +++++-
 mirix/llm_api/helpers.py                      |  60 ----
 mirix/memory.py                               |  46 ++-
 mirix/orm/message.py                          |   5 +
 mirix/schemas/message.py                      |   5 +
 mirix/server/server.py                        |   1 -
 mirix/services/agent_manager.py               |   5 +-
 mirix/settings.py                             |  29 --
 .../001_add_message_set_retention_count.sql   |   6 +-
 tests/conftest.py                             |  20 +-
 tests/test_agent_prompt_update.py             |   1 +
 tests/test_block_filter_tag_updates.py        |  18 +-
 tests/test_block_filter_tags_update_mode.py   |  18 +-
 tests/test_deletion_apis.py                   |  16 +-
 tests/test_filter_tags_db.py                  |  76 +++--
 tests/test_filter_tags_query.py               |  66 ++--
 tests/test_local_client.py                    |  10 +-
 tests/test_memory_integration.py              |  81 ++---
 tests/test_memory_server.py                   |  12 +-
 tests/test_message_handling.py                | 319 +++++++++++++++++-
 tests/test_multi_scope_access.py              |  48 +--
 tests/test_orm_to_pydantic_safe.py            | 109 +++---
 tests/test_queue.py                           |  13 +-
 tests/test_raw_memory.py                      |  32 +-
 tests/test_raw_memory_with_real_embeddings.py |   4 +-
 tests/test_redis_integration.py               |  22 +-
 tests/test_scoped_blocks.py                   |   8 +-
 tests/test_search_all_users.py                |  52 ++-
 tests/test_search_single_user_core_memory.py  |  18 +-
 tests/test_temporal_queries.py                |  24 +-
 tests/test_user.py                            |  20 +-
 tests/test_user_manager.py                    |  76 ++---
 32 files changed, 772 insertions(+), 558 deletions(-)

diff --git a/mirix/agent/agent.py b/mirix/agent/agent.py
index ca64bd9fb..47962dae6 100644
--- a/mirix/agent/agent.py
+++ b/mirix/agent/agent.py
@@ -37,6 +37,7 @@
 from mirix.llm_api.llm_api_tools import create
 from mirix.llm_api.llm_client import LLMClient
 from mirix.log import get_logger
+from mirix.memory import summarize_messages
 from mirix.observability.context import get_trace_context, mark_observation_as_child
 from mirix.observability.langfuse_client import get_langfuse_client
 from mirix.schemas.agent import AgentState, AgentStepResponse
@@ -66,7 +67,7 @@
 from mirix.services.step_manager import StepManager
 from mirix.services.tool_execution_sandbox import ToolExecutionSandbox
 from mirix.services.user_manager import UserManager
-from mirix.settings import settings, summarizer_settings
+from mirix.settings import settings
 from mirix.system import get_contine_chaining, get_token_limit_warning, package_function_response, package_user_message
 from mirix.tracing import trace_method
 from mirix.utils import (
@@ -1384,6 +1385,7 @@ async def step(
                 accumulated=accumulated,
                 chaining=chaining,
                 llm_client=llm_client,
+                retained_count=len(retained_input_sets),
                 **kwargs,
             )
 
@@ -1986,6 +1988,59 @@ async def construct_system_message(self, message: str) -> str:
 
         return complete_system_prompt
 
+    async def summarize_and_replace_retained_messages(
+        self,
+        retained_messages: List[Message],
+        existing_file_uris: Optional[List[str]] = None,
+    ) -> Message:
+        """Summarize retained input-set messages and replace them in the DB.
+
+        Calls the LLM to produce a summary of the retained messages, persists
+        the summary as a single ``message_type='summary'`` row, then hard-deletes
+        the original retained rows.
+
+        Returns the new summary ``Message`` for use in the in-memory accumulator.
+        """
+        printv(
+            f"[Mirix.Agent.{self.agent_state.name}] INFO: "
+            f"Summarizing {len(retained_messages)} retained messages to recover from context overflow"
+        )
+
+        summary_text = await summarize_messages(
+            agent_state=self.agent_state,
+            message_sequence_to_summarize=retained_messages,
+            existing_file_uris=existing_file_uris,
+        )
+
+        retention_agent_id = self.agent_state.parent_id or self.agent_state.id
+        summary_msg = Message(
+            agent_id=retention_agent_id,
+            role=MessageRole.user,
+            content=[TextContent(text=summary_text)],
+            user_id=self.user_id,
+            message_type="summary",
+        )
+
+        await self.message_manager.create_message(
+            summary_msg,
+            actor=self.actor,
+            client_id=self.client_id,
+            user_id=self.user_id,
+        )
+
+        for msg in retained_messages:
+            await self.message_manager.delete_message_by_id(
+                message_id=msg.id,
+                actor=self.actor,
+            )
+
+        printv(
+            f"[Mirix.Agent.{self.agent_state.name}] INFO: "
+            f"Replaced {len(retained_messages)} retained messages with summary (id={summary_msg.id})"
+        )
+
+        return summary_msg
+
     async def inner_step(
         self,
         messages: Union[Message, List[Message]],
@@ -2002,6 +2057,8 @@ async def inner_step(
         message_queue: Optional[any] = None,
         chaining: bool = True,
         llm_client: Optional[LLMClient] = None,
+        retained_count: int = 0,
+        _summarization_attempted: bool = False,
         **kwargs,
     ) -> AgentStepResponse:
         """Runs a single step in the agent loop (generates at most one LLM call)"""
@@ -2161,7 +2218,55 @@ async def inner_step(
             )
             if is_context_overflow_error(e):
                 num_accumulated = len(accumulated) + len(messages)
-                err_msg = f"Context window exceeded for agent id={self.agent_state.id} with {num_accumulated} in-context messages."
+
+                # Attempt summarization recovery: summarize retained DB messages
+                # and retry once with a smaller context.
+                retained = accumulated[:retained_count] if retained_count > 0 else []
+                if retained and not _summarization_attempted:
+                    printv(
+                        f"[Mirix.Agent.{self.agent_state.name}] INFO: "
+                        f"Context overflow with {num_accumulated} messages — "
+                        f"attempting summarization of {len(retained)} retained messages"
+                    )
+                    try:
+                        summary_msg = await self.summarize_and_replace_retained_messages(retained, existing_file_uris)
+                    except Exception as summarize_err:
+                        printv(
+                            f"[Mirix.Agent.{self.agent_state.name}] ERROR: " f"Summarization failed: {summarize_err}"
+                        )
+                        raise ContextWindowExceededError(
+                            f"Context window exceeded for agent id={self.agent_state.id} "
+                            f"and summarization recovery failed: {summarize_err}",
+                            details={"num_in_context_messages": num_accumulated},
+                        ) from e
+
+                    chaining_outputs = accumulated[retained_count:]
+                    new_accumulated = [summary_msg] + chaining_outputs
+
+                    return await self.inner_step(
+                        messages=messages,
+                        accumulated=new_accumulated,
+                        stream=stream,
+                        step_count=step_count,
+                        force_response=force_response,
+                        topics=topics,
+                        retrieved_memories=retrieved_memories,
+                        display_intermediate_message=display_intermediate_message,
+                        request_user_confirmation=request_user_confirmation,
+                        existing_file_uris=existing_file_uris,
+                        return_memory_types_without_update=return_memory_types_without_update,
+                        message_queue=message_queue,
+                        chaining=chaining,
+                        llm_client=llm_client,
+                        retained_count=1,
+                        _summarization_attempted=True,
+                        **kwargs,
+                    )
+
+                err_msg = (
+                    f"Context window exceeded for agent id={self.agent_state.id} "
+                    f"with {num_accumulated} in-context messages."
+                )
                 printv(f"[Mirix.Agent.{self.agent_state.name}] ERROR: {err_msg}")
                 raise ContextWindowExceededError(
                     err_msg,
@@ -2226,7 +2331,6 @@ def migrate_embedding(self, embedding_config: EmbeddingConfig):
         raise NotImplementedError()
 
 
-
 def strip_name_field_from_user_message(
     user_message_text: str,
 ) -> Tuple[str, Optional[str]]:
diff --git a/mirix/llm_api/helpers.py b/mirix/llm_api/helpers.py
index aa0414f2a..7cecd8b45 100755
--- a/mirix/llm_api/helpers.py
+++ b/mirix/llm_api/helpers.py
@@ -4,9 +4,7 @@
 import httpx
 
 from mirix.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
-from mirix.schemas.enums import MessageRole
 from mirix.schemas.message import Message
-from mirix.settings import summarizer_settings
 from mirix.utils import count_tokens, printd
 
 logger = logging.getLogger(__name__)
@@ -186,64 +184,6 @@ async def make_post_request(url: str, headers: dict[str, str], data: dict[str, A
         raise Exception(error_message) from e
 
 
-def calculate_summarizer_cutoff(
-    in_context_messages: List[Message],
-    token_counts: List[int],
-    logger: "logging.Logger",
-) -> int:
-    if len(in_context_messages) != len(token_counts):
-        raise ValueError(
-            f"Given in_context_messages has different length from given token_counts: {len(in_context_messages)} != {len(token_counts)}"
-        )
-
-    in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
-
-    if summarizer_settings.evict_all_messages:
-        logger.debug("Evicting all messages...")
-        return len(in_context_messages)
-    else:
-        # Start at index 1 (past the system message),
-        # and collect messages for summarization until we reach the desired truncation token fraction (eg 50%)
-        # We do the inverse of `desired_memory_token_pressure` to get what we need to remove
-        desired_token_count_to_summarize = int(
-            sum(token_counts) * (1 - summarizer_settings.desired_memory_token_pressure)
-        )
-        logger.debug(f"desired_token_count_to_summarize={desired_token_count_to_summarize}")
-
-        tokens_so_far = 0
-        cutoff = 0
-        for i, msg in enumerate(in_context_messages_openai):
-            # Skip system
-            if i == 0:
-                continue
-            cutoff = i
-            tokens_so_far += token_counts[i]
-
-            if msg["role"] not in ["user", "tool", "function"] and tokens_so_far >= desired_token_count_to_summarize:
-                # The intent of this code is to break on an assistant message boundary,
-                # so that we don't summarize in the middle of a back and forth turn.
-                # Break if the role is NOT a user or tool/function and tokens_so_far is enough
-                break
-            elif len(in_context_messages) - cutoff - 1 <= summarizer_settings.keep_last_n_messages:
-                # Also break if we reached the `keep_last_n_messages` threshold
-                # NOTE: This may be on a user, tool, or function in theory
-                logger.warning(
-                    f"Breaking summary cutoff early on role={msg['role']} because we hit the `keep_last_n_messages`={summarizer_settings.keep_last_n_messages}"
-                )
-                break
-        # If the next message is a tool call result, then include it in the set of messages to summarize as well.
-        # The intent of this code is so that tool calls and their results stay together. They are either both summarized
-        # or neither is.
-        while (
-            cutoff + 1 < len(in_context_messages_openai)
-            and in_context_messages_openai[cutoff + 1]["role"] == MessageRole.tool
-        ):
-            cutoff += 1
-
-        logger.debug("Evicting %s/%s messages...", cutoff, len(in_context_messages))
-        return cutoff + 1
-
-
 def get_token_counts_for_messages(in_context_messages: List[Message]) -> List[int]:
     in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
     token_counts = [count_tokens(str(msg)) for msg in in_context_messages_openai]
diff --git a/mirix/memory.py b/mirix/memory.py
index 0498041cc..0e1113b65 100755
--- a/mirix/memory.py
+++ b/mirix/memory.py
@@ -8,7 +8,6 @@
 from mirix.schemas.memory import Memory
 from mirix.schemas.message import Message
 from mirix.schemas.mirix_message_content import TextContent
-from mirix.settings import summarizer_settings
 from mirix.utils import count_tokens, printd
 
 
@@ -62,43 +61,37 @@ async def summarize_messages(
     agent_state: AgentState,
     message_sequence_to_summarize: List[Message],
     existing_file_uris: Optional[List[str]] = None,
-):
-    """Summarize a message sequence using GPT"""
-    # we need the context_window
+) -> str:
+    """Summarize a message sequence using the agent's LLM.
+
+    If the formatted input exceeds ~60% of the context window, the message
+    list is truncated (keeping the newest messages) so the summarizer call
+    itself doesn't overflow.
+    """
     context_window = agent_state.llm_config.context_window
+    max_input_tokens = int(context_window * 0.6)
 
-    summary_prompt = SUMMARY_PROMPT_SYSTEM
     summary_input = _format_summary_history(message_sequence_to_summarize)
     summary_input_tkns = count_tokens(summary_input)
-    if summary_input_tkns > summarizer_settings.memory_warning_threshold * context_window:
-        trunc_ratio = (
-            summarizer_settings.memory_warning_threshold * context_window / summary_input_tkns
-        ) * 0.8  # For good measure...
-        cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
-        summary_input = str(
-            [
-                await summarize_messages(
-                    agent_state,
-                    message_sequence_to_summarize=message_sequence_to_summarize[:cutoff],
-                )
-            ]
-            + message_sequence_to_summarize[cutoff:]
-        )
-
-    dummy_agent_id = agent_state.id
+
+    if summary_input_tkns > max_input_tokens:
+        ratio = max_input_tokens / summary_input_tkns * 0.8
+        keep = max(1, int(len(message_sequence_to_summarize) * ratio))
+        summary_input = _format_summary_history(message_sequence_to_summarize[-keep:])
+
     message_sequence = [
         Message(
-            agent_id=dummy_agent_id,
+            agent_id=agent_state.id,
             role=MessageRole.system,
-            content=[TextContent(text=summary_prompt)],
+            content=[TextContent(text=SUMMARY_PROMPT_SYSTEM)],
         ),
         Message(
-            agent_id=dummy_agent_id,
+            agent_id=agent_state.id,
             role=MessageRole.assistant,
             content=[TextContent(text=MESSAGE_SUMMARY_REQUEST_ACK)],
         ),
         Message(
-            agent_id=dummy_agent_id,
+            agent_id=agent_state.id,
             role=MessageRole.user,
             content=[TextContent(text=summary_input)],
         ),
@@ -114,5 +107,4 @@ async def summarize_messages(
     )
 
     printd(f"summarize_messages gpt reply: {response.choices[0]}")
-    reply = response.choices[0].message.content
-    return reply
+    return response.choices[0].message.content
diff --git a/mirix/orm/message.py b/mirix/orm/message.py
index 8362178de..d0847adbe 100755
--- a/mirix/orm/message.py
+++ b/mirix/orm/message.py
@@ -79,6 +79,11 @@ class Message(SqlalchemyBase, OrganizationMixin, UserMixin, AgentMixin):
         nullable=True,
         doc="The id of the sender of the message, can be an identity id or agent id",
     )
+    message_type: Mapped[Optional[str]] = mapped_column(
+        nullable=True,
+        default="original",
+        doc="Type of message: 'original' for user input, 'summary' for summarized retained context",
+    )
 
     # Relationships
     agent: Mapped["Agent"] = relationship("Agent", back_populates="messages", lazy="selectin")
diff --git a/mirix/schemas/message.py b/mirix/schemas/message.py
index 515941280..bd616fbf4 100644
--- a/mirix/schemas/message.py
+++ b/mirix/schemas/message.py
@@ -170,6 +170,11 @@ class Message(BaseMessage):
         ],
     )
 
+    message_type: Optional[str] = Field(
+        default="original",
+        description="Type of message: 'original' for user input, 'summary' for summarized retained context",
+    )
+
     @field_validator("role")
     @classmethod
     def validate_role(cls, v: str) -> str:
diff --git a/mirix/server/server.py b/mirix/server/server.py
index 040d4535c..6a7ed9ebd 100644
--- a/mirix/server/server.py
+++ b/mirix/server/server.py
@@ -1289,7 +1289,6 @@ def add_llm_model(self, request: LLMConfig) -> LLMConfig:
     def add_embedding_model(self, request: EmbeddingConfig) -> EmbeddingConfig:
         """Add a new embedding model"""
 
-
     async def run_tool_from_source(
         self,
         actor: Client,
diff --git a/mirix/services/agent_manager.py b/mirix/services/agent_manager.py
index 392e01c6a..6c5a856b2 100644
--- a/mirix/services/agent_manager.py
+++ b/mirix/services/agent_manager.py
@@ -624,7 +624,6 @@ async def update_agent_tools_and_system_prompts(
                 actor=actor,
             )
 
-
     @enforce_types
     async def _create_agent(
         self,
@@ -684,7 +683,9 @@ async def update_llm_config(
         )
 
     @enforce_types
-    async def update_system_prompt(self, agent_id: str, system_prompt: str, actor: PydanticClient) -> PydanticAgentState:
+    async def update_system_prompt(
+        self, agent_id: str, system_prompt: str, actor: PydanticClient
+    ) -> PydanticAgentState:
         return await self.update_agent(
             agent_id=agent_id,
             agent_update=UpdateAgent(system=system_prompt),
diff --git a/mirix/settings.py b/mirix/settings.py
index 8b8c05566..a0d5576ed 100755
--- a/mirix/settings.py
+++ b/mirix/settings.py
@@ -22,34 +22,6 @@ class ToolSettings(BaseSettings):
     local_sandbox_dir: Optional[str] = None
 
 
-class SummarizerSettings(BaseSettings):
-    model_config = SettingsConfigDict(env_prefix="mirix_summarizer_", extra="ignore")
-
-    # Controls if we should evict all messages
-    # TODO: Can refactor this into an enum if we have a bunch of different kinds of summarizers
-    evict_all_messages: bool = False
-
-    # The maximum number of retries for the summarizer
-    # If we reach this cutoff, it probably means that the summarizer is not compressing down the in-context messages any further
-    # And we throw a fatal error
-    max_summarizer_retries: int = 3
-
-    # When to warn the model that a summarize command will happen soon
-    # The amount of tokens before a system warning about upcoming truncation is sent to Mirix
-    memory_warning_threshold: float = 0.75
-
-    # Whether to send the system memory warning message
-    send_memory_warning_message: bool = False
-
-    # The desired memory pressure to summarize down to
-    desired_memory_token_pressure: float = 0.1
-
-    # The number of messages at the end to keep
-    # Even when summarizing, we may want to keep a handful of recent messages
-    # These serve as in-context examples of how to use functions / what user messages look like
-    keep_last_n_messages: int = 5
-
-
 class ModelSettings(BaseSettings):
     model_config = SettingsConfigDict(env_file=".env", extra="ignore")
 
@@ -281,4 +253,3 @@ class TestSettings(Settings):
 test_settings = TestSettings()
 model_settings = ModelSettings()
 tool_settings = ToolSettings()
-summarizer_settings = SummarizerSettings()
diff --git a/scripts/migrations/001_add_message_set_retention_count.sql b/scripts/migrations/001_add_message_set_retention_count.sql
index 07f552d95..cdd9d9a19 100644
--- a/scripts/migrations/001_add_message_set_retention_count.sql
+++ b/scripts/migrations/001_add_message_set_retention_count.sql
@@ -6,7 +6,11 @@
 ALTER TABLE clients
     ADD COLUMN IF NOT EXISTS message_set_retention_count INTEGER DEFAULT 0;
 
--- 2. Add composite index for efficient retention queries on messages
+-- 2. Add message_type to messages table (distinguishes original vs summary messages)
+ALTER TABLE messages
+    ADD COLUMN IF NOT EXISTS message_type VARCHAR DEFAULT 'original';
+
+-- 3. Add composite index for efficient retention queries on messages
 --    Supports: ORDER BY created_at DESC, id DESC WHERE agent_id=? AND user_id=?
 CREATE INDEX IF NOT EXISTS ix_messages_agent_user_created_at
     ON messages (agent_id, user_id, created_at, id);
diff --git a/tests/conftest.py b/tests/conftest.py
index 41169f572..64237d768 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -20,6 +20,7 @@
 from mirix.services.organization_manager import OrganizationManager
 from mirix.settings import settings
 
+
 @pytest_asyncio.fixture(scope="module", autouse=True)
 async def _reset_engine_per_module():
     """Dispose and recreate the async engine with NullPool at the start of
@@ -43,14 +44,10 @@ async def _reset_engine_per_module():
         from sqlalchemy.pool import NullPool
 
         await server_module.engine.dispose()
-        _pg_uri = (
-            settings.mirix_pg_uri.replace(
-                "postgresql+pg8000://", "postgresql+asyncpg://"
-            ).replace("postgresql://", "postgresql+asyncpg://")
-        )
-        server_module.engine = create_async_engine(
-            _pg_uri, poolclass=NullPool, echo=settings.pg_echo
+        _pg_uri = settings.mirix_pg_uri.replace("postgresql+pg8000://", "postgresql+asyncpg://").replace(
+            "postgresql://", "postgresql+asyncpg://"
         )
+        server_module.engine = create_async_engine(_pg_uri, poolclass=NullPool, echo=settings.pg_echo)
         server_module.AsyncSessionLocal = async_sessionmaker(
             bind=server_module.engine,
             class_=AsyncSession,
@@ -72,9 +69,7 @@ async def _ensure_org(org_mgr: OrganizationManager, org_id: str, org_name: str):
     try:
         await org_mgr.get_organization_by_id(org_id)
     except Exception:
-        await org_mgr.create_organization(
-            PydanticOrganization(id=org_id, name=org_name)
-        )
+        await org_mgr.create_organization(PydanticOrganization(id=org_id, name=org_name))
 
 
 async def _issue_key(client_id: str, org_id: str, client_mgr: ClientManager) -> str:
@@ -83,9 +78,7 @@ async def _issue_key(client_id: str, org_id: str, client_mgr: ClientManager) ->
     return api_key
 
 
-async def _create_client_and_key(
-    client_id: str, org_id: str, org_name: Optional[str] = None
-) -> dict:
+async def _create_client_and_key(client_id: str, org_id: str, org_name: Optional[str] = None) -> dict:
     """
     Create one test client and API key in the current event loop.
     Use this from async fixtures when you need multiple clients in the same loop
@@ -115,6 +108,7 @@ def api_key_factory():
     """
     Factory to provision API keys for test clients.
     """
+
     def _create(client_id: str = TEST_CLIENT_ID, org_id: str = TEST_ORG_ID):
         result = asyncio.run(_create_client_and_key(client_id, org_id))
         os.environ["MIRIX_API_KEY"] = result["api_key"]
diff --git a/tests/test_agent_prompt_update.py b/tests/test_agent_prompt_update.py
index 4612a7c4c..ce52eb1e1 100644
--- a/tests/test_agent_prompt_update.py
+++ b/tests/test_agent_prompt_update.py
@@ -102,6 +102,7 @@ async def client(server_check, api_auth):
             print(f"[SETUP] ⚠ Warning: Test memory addition failed: {e}")
     except Exception as e:
         import traceback
+
         print(f"\n[ERROR] Failed to create/get user: {e}")
         pytest.skip(f"Failed to create/get user: {e}")
 
diff --git a/tests/test_block_filter_tag_updates.py b/tests/test_block_filter_tag_updates.py
index 1742f7603..f33f1f0d5 100644
--- a/tests/test_block_filter_tag_updates.py
+++ b/tests/test_block_filter_tag_updates.py
@@ -35,11 +35,11 @@
 from mirix.schemas.enums import MessageRole
 from mirix.schemas.message import MessageCreate
 
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _test_id(prefix: str) -> str:
     return f"{prefix}-{uuid.uuid4().hex[:8]}"
 
@@ -60,9 +60,7 @@ async def ensure_queue_org():
     try:
         await org_mgr.get_organization_by_id(QUEUE_TEST_ORG_ID)
     except Exception:
-        await org_mgr.create_organization(
-            PydanticOrganization(id=QUEUE_TEST_ORG_ID, name="BFT Queue Test Org")
-        )
+        await org_mgr.create_organization(PydanticOrganization(id=QUEUE_TEST_ORG_ID, name="BFT Queue Test Org"))
     return QUEUE_TEST_ORG_ID
 
 
@@ -370,9 +368,7 @@ async def test_put_messages_serializes_update_mode(
 
         await manager.cleanup()
 
-    async def test_put_messages_default_merge(
-        self, queue_clean_manager, queue_sample_client, queue_sample_messages
-    ):
+    async def test_put_messages_default_merge(self, queue_clean_manager, queue_sample_client, queue_sample_messages):
         """put_messages without explicit update_mode defaults to 'merge'."""
         manager = queue_clean_manager
         await manager.initialize()
@@ -397,9 +393,7 @@ async def test_update_mode_passed_through_to_send_messages(
 
         # Worker resolves actor via server.client_manager.get_client_by_id; provide it.
         queue_mock_server.client_manager = Mock()
-        queue_mock_server.client_manager.get_client_by_id = AsyncMock(
-            return_value=queue_sample_client
-        )
+        queue_mock_server.client_manager.get_client_by_id = AsyncMock(return_value=queue_sample_client)
 
         manager = queue_clean_manager
         await initialize_queue(queue_mock_server)
@@ -447,9 +441,7 @@ async def setup(self):
         try:
             org = await org_mgr.get_organization_by_id(org_id)
         except Exception:
-            org = await org_mgr.create_organization(
-                PydanticOrganization(id=org_id, name="BFT Test Org")
-            )
+            org = await org_mgr.create_organization(PydanticOrganization(id=org_id, name="BFT Test Org"))
 
         user_mgr = UserManager()
         user_id = _test_id("bft-user")
diff --git a/tests/test_block_filter_tags_update_mode.py b/tests/test_block_filter_tags_update_mode.py
index be875c1bc..8ffe424b0 100644
--- a/tests/test_block_filter_tags_update_mode.py
+++ b/tests/test_block_filter_tags_update_mode.py
@@ -35,11 +35,11 @@
 from mirix.schemas.enums import MessageRole
 from mirix.schemas.message import MessageCreate
 
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def _test_id(prefix: str) -> str:
     return f"{prefix}-{uuid.uuid4().hex[:8]}"
 
@@ -60,9 +60,7 @@ async def ensure_queue_org():
     try:
         await org_mgr.get_organization_by_id(QUEUE_TEST_ORG_ID)
     except Exception:
-        await org_mgr.create_organization(
-            PydanticOrganization(id=QUEUE_TEST_ORG_ID, name="BFT Queue Test Org")
-        )
+        await org_mgr.create_organization(PydanticOrganization(id=QUEUE_TEST_ORG_ID, name="BFT Queue Test Org"))
     return QUEUE_TEST_ORG_ID
 
 
@@ -485,9 +483,7 @@ async def test_put_messages_serializes_update_mode(
 
         await manager.cleanup()
 
-    async def test_put_messages_default_merge(
-        self, queue_clean_manager, queue_sample_client, queue_sample_messages
-    ):
+    async def test_put_messages_default_merge(self, queue_clean_manager, queue_sample_client, queue_sample_messages):
         """put_messages without explicit update_mode defaults to 'merge'."""
         manager = queue_clean_manager
         await manager.initialize()
@@ -512,9 +508,7 @@ async def test_update_mode_passed_through_to_send_messages(
 
         # Worker resolves actor via server.client_manager.get_client_by_id; provide it.
         queue_mock_server.client_manager = Mock()
-        queue_mock_server.client_manager.get_client_by_id = AsyncMock(
-            return_value=queue_sample_client
-        )
+        queue_mock_server.client_manager.get_client_by_id = AsyncMock(return_value=queue_sample_client)
 
         manager = queue_clean_manager
         await initialize_queue(queue_mock_server)
@@ -562,9 +556,7 @@ async def setup(self):
         try:
             org = await org_mgr.get_organization_by_id(org_id)
         except Exception:
-            org = await org_mgr.create_organization(
-                PydanticOrganization(id=org_id, name="BFT Test Org")
-            )
+            org = await org_mgr.create_organization(PydanticOrganization(id=org_id, name="BFT Test Org"))
 
         user_mgr = UserManager()
         user_id = _test_id("bft-user")
diff --git a/tests/test_deletion_apis.py b/tests/test_deletion_apis.py
index 083169147..ba797611d 100644
--- a/tests/test_deletion_apis.py
+++ b/tests/test_deletion_apis.py
@@ -199,9 +199,7 @@ async def add_test_memories(client: MirixClient, user_id: str, batch_label: str)
     logger.info("⏱️  Checking if memories were stored in database...")
 
     async with db_context() as session:
-        r = await session.execute(
-            select(func.count()).select_from(MessageModel).where(MessageModel.user_id == user_id)
-        )
+        r = await session.execute(select(func.count()).select_from(MessageModel).where(MessageModel.user_id == user_id))
         message_count = r.scalar_one()
     logger.info("✓ Messages in database after batch %s: %d", batch_label, message_count)
 
@@ -240,23 +238,17 @@ async def count_memories_via_api(user_id: str, log_details: bool = False) -> dic
         )
         procedural_count = r.scalar_one()
 
-        r = await session.execute(
-            select(func.count()).select_from(MessageModel).where(MessageModel.user_id == user_id)
-        )
+        r = await session.execute(select(func.count()).select_from(MessageModel).where(MessageModel.user_id == user_id))
         message_count = r.scalar_one()
 
-        r = await session.execute(
-            select(func.count()).select_from(BlockModel).where(BlockModel.user_id == user_id)
-        )
+        r = await session.execute(select(func.count()).select_from(BlockModel).where(BlockModel.user_id == user_id))
         block_count = r.scalar_one()
 
         # Log details for debugging
         if log_details:
             logger.debug("Memory details for user %s:", user_id)
             if semantic_count == 0:
-                result = await session.execute(
-                    select(SemanticMemoryItem).where(SemanticMemoryItem.user_id == user_id)
-                )
+                result = await session.execute(select(SemanticMemoryItem).where(SemanticMemoryItem.user_id == user_id))
                 all_semantic = result.scalars().all()
                 logger.debug("  Total semantic memories (including deleted): %d", len(all_semantic))
                 for mem in all_semantic:
diff --git a/tests/test_filter_tags_db.py b/tests/test_filter_tags_db.py
index 5271705c8..4f188a553 100644
--- a/tests/test_filter_tags_db.py
+++ b/tests/test_filter_tags_db.py
@@ -31,11 +31,11 @@
 from mirix.schemas.user import User as PydanticUser
 from mirix.services.raw_memory_manager import RawMemoryManager
 
-
 # =================================================================
 # FIXTURES
 # =================================================================
 
+
 @pytest.fixture
 def raw_memory_manager():
     return RawMemoryManager()
@@ -54,9 +54,7 @@ async def test_actor():
     try:
         await org_mgr.get_organization_by_id(org_id)
     except Exception:
-        await org_mgr.create_organization(
-            PydanticOrganization(id=org_id, name="Filter Tags Test Org")
-        )
+        await org_mgr.create_organization(PydanticOrganization(id=org_id, name="Filter Tags Test Org"))
 
     client_id = f"test-filter-tags-client-{uuid.uuid4().hex[:8]}"
     try:
@@ -115,11 +113,14 @@ async def _create_memory(raw_memory_manager, test_actor, test_user, context, fil
 # $contains operator
 # =================================================================
 
+
 class TestContainsOperator:
     async def test_contains_matches_array_value(self, raw_memory_manager, test_actor, test_user):
         """$contains finds a value inside a stored JSON array."""
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "contains-match",
             {"scope": "test-ft", "account_ids": ["ABC", "DEF"]},
         )
@@ -137,7 +138,9 @@ async def test_contains_matches_array_value(self, raw_memory_manager, test_actor
     async def test_contains_no_match(self, raw_memory_manager, test_actor, test_user):
         """$contains returns nothing when value is not in the array."""
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "contains-no-match",
             {"scope": "test-ft", "account_ids": ["ABC", "DEF"]},
         )
@@ -155,7 +158,9 @@ async def test_contains_no_match(self, raw_memory_manager, test_actor, test_user
     async def test_contains_missing_key_no_error(self, raw_memory_manager, test_actor, test_user):
         """$contains on a key that doesn't exist silently excludes the row."""
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "contains-missing-key",
             {"scope": "test-ft"},
         )
@@ -173,7 +178,9 @@ async def test_contains_missing_key_no_error(self, raw_memory_manager, test_acto
     async def test_contains_scalar_value_no_error(self, raw_memory_manager, test_actor, test_user):
         """$contains on a key that holds a scalar (not array) silently excludes the row."""
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "contains-scalar",
             {"scope": "test-ft", "account_ids": "ABC"},
         )
@@ -193,10 +200,13 @@ async def test_contains_scalar_value_no_error(self, raw_memory_manager, test_act
 # $exists operator
 # =================================================================
 
+
 class TestExistsOperator:
     async def test_exists_true_matches(self, raw_memory_manager, test_actor, test_user):
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "exists-true",
             {"scope": "test-ft", "project_id": "proj-1"},
         )
@@ -213,7 +223,9 @@ async def test_exists_true_matches(self, raw_memory_manager, test_actor, test_us
 
     async def test_exists_true_excludes_missing_key(self, raw_memory_manager, test_actor, test_user):
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "exists-true-missing",
             {"scope": "test-ft"},
         )
@@ -230,7 +242,9 @@ async def test_exists_true_excludes_missing_key(self, raw_memory_manager, test_a
 
     async def test_exists_false_matches_missing_key(self, raw_memory_manager, test_actor, test_user):
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "exists-false",
             {"scope": "test-ft"},
         )
@@ -250,10 +264,13 @@ async def test_exists_false_matches_missing_key(self, raw_memory_manager, test_a
 # $in operator
 # =================================================================
 
+
 class TestInOperator:
     async def test_in_matches(self, raw_memory_manager, test_actor, test_user):
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "in-match",
             {"scope": "test-ft", "status": "active"},
         )
@@ -270,7 +287,9 @@ async def test_in_matches(self, raw_memory_manager, test_actor, test_user):
 
     async def test_in_no_match(self, raw_memory_manager, test_actor, test_user):
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "in-no-match",
             {"scope": "test-ft", "status": "archived"},
         )
@@ -290,6 +309,7 @@ async def test_in_no_match(self, raw_memory_manager, test_actor, test_user):
 # scopes parameter
 # =================================================================
 
+
 class TestScopes:
     async def test_scopes_filters_by_scope(self, raw_memory_manager, test_actor, test_user):
         """scopes parameter translates to scope IN (...) correctly.
@@ -299,7 +319,9 @@ async def test_scopes_filters_by_scope(self, raw_memory_manager, test_actor, tes
         with the matching scope finds the memory and a non-matching scope does not.
         """
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "scope-match",
             {},
         )
@@ -328,7 +350,9 @@ async def test_scopes_filters_by_scope(self, raw_memory_manager, test_actor, tes
 
     async def test_empty_scopes_returns_nothing(self, raw_memory_manager, test_actor, test_user):
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "empty-scopes",
             {"scope": "test-ft"},
         )
@@ -346,7 +370,9 @@ async def test_empty_scopes_returns_nothing(self, raw_memory_manager, test_actor
     async def test_read_scopes_in_filter_tags_ignored(self, raw_memory_manager, test_actor, test_user):
         """read_scopes key in filter_tags is ignored; use scopes param instead."""
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "ignored-read-scopes",
             {"scope": "test-ft"},
         )
@@ -367,10 +393,13 @@ async def test_read_scopes_in_filter_tags_ignored(self, raw_memory_manager, test
 # Backward compatibility
 # =================================================================
 
+
 class TestBackwardCompatibility:
     async def test_plain_scalar_exact_match(self, raw_memory_manager, test_actor, test_user):
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "scalar-match",
             {"scope": "test-ft", "priority": "high"},
         )
@@ -388,7 +417,9 @@ async def test_plain_scalar_exact_match(self, raw_memory_manager, test_actor, te
     async def test_null_filter_tags_excluded_by_exists(self, raw_memory_manager, test_actor, test_user):
         """Rows with NULL filter_tags are silently excluded by $exists: true."""
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "null-filter-tags",
             None,
         )
@@ -408,16 +439,21 @@ async def test_null_filter_tags_excluded_by_exists(self, raw_memory_manager, tes
 # Mixed operators
 # =================================================================
 
+
 class TestMixedOperators:
     async def test_contains_and_scalar_combined(self, raw_memory_manager, test_actor, test_user):
         """Combining $contains with a plain scalar filter (AND)."""
         mem = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "mixed-match",
             {"scope": "test-ft", "account_ids": ["ABC", "DEF"], "priority": "high"},
         )
         mem_no_match = await _create_memory(
-            raw_memory_manager, test_actor, test_user,
+            raw_memory_manager,
+            test_actor,
+            test_user,
             "mixed-no-match",
             {"scope": "test-ft", "account_ids": ["ABC", "DEF"], "priority": "low"},
         )
diff --git a/tests/test_filter_tags_query.py b/tests/test_filter_tags_query.py
index 98dd2aed3..2f4f02f9f 100644
--- a/tests/test_filter_tags_query.py
+++ b/tests/test_filter_tags_query.py
@@ -17,11 +17,11 @@
     can_redis_handle,
 )
 
-
 # ---------------------------------------------------------------------------
 # Minimal ORM model for testing SQLAlchemy compilation (no real DB needed)
 # ---------------------------------------------------------------------------
 
+
 class _Base(DeclarativeBase):
     pass
 
@@ -44,6 +44,7 @@ def _compile_query(query) -> str:
 # can_redis_handle
 # ===================================================================
 
+
 class TestCanRedisHandle:
     def test_none_filter_tags(self):
         assert can_redis_handle(None) is True
@@ -77,6 +78,7 @@ def test_mixed_scalar_and_operator(self):
 # build_filter_tags_redis
 # ===================================================================
 
+
 class TestBuildFilterTagsRedis:
     def test_none_no_scopes(self):
         assert build_filter_tags_redis(None) == ""
@@ -125,6 +127,7 @@ def test_special_chars_escaped(self):
 # build_filter_tags_raw_sql
 # ===================================================================
 
+
 class TestBuildFilterTagsRawSql:
     def test_none(self):
         clauses, params = build_filter_tags_raw_sql(None)
@@ -160,56 +163,42 @@ def test_scopes_empty(self):
         assert clauses == ["1 = 0"]
 
     def test_scopes_with_filter_tags(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"env": "prod"}, scopes=["A"]
-        )
+        clauses, params = build_filter_tags_raw_sql({"env": "prod"}, scopes=["A"])
         assert len(clauses) == 2
         assert any("filter_tags->>'scope' IN" in c for c in clauses)
         assert any("filter_tags->>'env'" in c for c in clauses)
 
     def test_ignored_keys_excluded(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"read_scopes": ["X"], "scope": "Y", "env": "prod"}
-        )
+        clauses, params = build_filter_tags_raw_sql({"read_scopes": ["X"], "scope": "Y", "env": "prod"})
         assert len(clauses) == 1
         assert "filter_tags->>'env'" in clauses[0]
 
     def test_contains_operator(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"account_ids": {"$contains": "ABC"}}
-        )
+        clauses, params = build_filter_tags_raw_sql({"account_ids": {"$contains": "ABC"}})
         assert len(clauses) == 1
         assert "filter_tags::jsonb @>" in clauses[0]
         param_val = json.loads(params["filter_contains_account_ids"])
         assert param_val == {"account_ids": ["ABC"]}
 
     def test_exists_true(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"account_ids": {"$exists": True}}
-        )
+        clauses, params = build_filter_tags_raw_sql({"account_ids": {"$exists": True}})
         assert len(clauses) == 1
         assert "filter_tags::jsonb ? 'account_ids'" == clauses[0]
 
     def test_exists_false(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"account_ids": {"$exists": False}}
-        )
+        clauses, params = build_filter_tags_raw_sql({"account_ids": {"$exists": False}})
         assert len(clauses) == 1
         assert "NOT (filter_tags::jsonb ? 'account_ids')" == clauses[0]
 
     def test_in_operator(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"status": {"$in": ["active", "pending"]}}
-        )
+        clauses, params = build_filter_tags_raw_sql({"status": {"$in": ["active", "pending"]}})
         assert len(clauses) == 1
         assert "filter_tags->>'status' IN" in clauses[0]
         assert params["filter_in_status_0"] == "active"
         assert params["filter_in_status_1"] == "pending"
 
     def test_in_operator_empty_list(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"status": {"$in": []}}
-        )
+        clauses, params = build_filter_tags_raw_sql({"status": {"$in": []}})
         assert clauses == ["1 = 0"]
 
     def test_unknown_operator_raises(self):
@@ -221,15 +210,11 @@ def test_multiple_operators_in_one_dict_raises(self):
             build_filter_tags_raw_sql({"x": {"$contains": "a", "$in": ["b"]}})
 
     def test_mixed_scalar_and_operator(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"env": "prod", "account_ids": {"$contains": "ABC"}}
-        )
+        clauses, params = build_filter_tags_raw_sql({"env": "prod", "account_ids": {"$contains": "ABC"}})
         assert len(clauses) == 2
 
     def test_scopes_with_operator(self):
-        clauses, params = build_filter_tags_raw_sql(
-            {"account_ids": {"$contains": "X"}}, scopes=["A"]
-        )
+        clauses, params = build_filter_tags_raw_sql({"account_ids": {"$contains": "X"}}, scopes=["A"])
         assert len(clauses) == 2
 
 
@@ -237,6 +222,7 @@ def test_scopes_with_operator(self):
 # apply_filter_tags_sqlalchemy
 # ===================================================================
 
+
 class TestApplyFilterTagsSqlalchemy:
     def _base_query(self):
         return select(_FakeMemory)
@@ -273,51 +259,39 @@ def test_scopes_empty(self):
 
     def test_ignored_keys_excluded(self):
         q = self._base_query()
-        result = apply_filter_tags_sqlalchemy(
-            q, _FakeMemory, {"read_scopes": ["X"], "scope": "Y"}
-        )
+        result = apply_filter_tags_sqlalchemy(q, _FakeMemory, {"read_scopes": ["X"], "scope": "Y"})
         sql = _compile_query(result)
         assert _compile_query(result) == _compile_query(q)
 
     def test_contains_operator(self):
         q = self._base_query()
-        result = apply_filter_tags_sqlalchemy(
-            q, _FakeMemory, {"account_ids": {"$contains": "ABC"}}
-        )
+        result = apply_filter_tags_sqlalchemy(q, _FakeMemory, {"account_ids": {"$contains": "ABC"}})
         sql = _compile_query(result)
         assert "CAST" in sql or "cast" in sql.lower() or "@>" in sql
 
     def test_exists_true(self):
         q = self._base_query()
-        result = apply_filter_tags_sqlalchemy(
-            q, _FakeMemory, {"account_ids": {"$exists": True}}
-        )
+        result = apply_filter_tags_sqlalchemy(q, _FakeMemory, {"account_ids": {"$exists": True}})
         sql = _compile_query(result)
         assert "JSONB" in sql.upper() or "jsonb" in sql
         assert "?" in sql
 
     def test_exists_false(self):
         q = self._base_query()
-        result = apply_filter_tags_sqlalchemy(
-            q, _FakeMemory, {"account_ids": {"$exists": False}}
-        )
+        result = apply_filter_tags_sqlalchemy(q, _FakeMemory, {"account_ids": {"$exists": False}})
         sql = _compile_query(result)
         assert "?" in sql
         assert "NOT" in sql.upper()
 
     def test_in_operator(self):
         q = self._base_query()
-        result = apply_filter_tags_sqlalchemy(
-            q, _FakeMemory, {"status": {"$in": ["active", "pending"]}}
-        )
+        result = apply_filter_tags_sqlalchemy(q, _FakeMemory, {"status": {"$in": ["active", "pending"]}})
         sql = _compile_query(result)
         assert "IN" in sql.upper()
 
     def test_in_operator_empty(self):
         q = self._base_query()
-        result = apply_filter_tags_sqlalchemy(
-            q, _FakeMemory, {"status": {"$in": []}}
-        )
+        result = apply_filter_tags_sqlalchemy(q, _FakeMemory, {"status": {"$in": []}})
         sql = _compile_query(result)
         assert "1 = 0" in sql
 
diff --git a/tests/test_local_client.py b/tests/test_local_client.py
index b487aecf6..4102d1356 100644
--- a/tests/test_local_client.py
+++ b/tests/test_local_client.py
@@ -710,8 +710,9 @@ async def test_send_messages_passes_block_filter_tags_to_server(self, client_a):
         block_filter_tags = {"env": "staging", "team": "platform"}
         mock_send = AsyncMock(return_value=MirixUsageStatistics())
         # Patch MirixResponse so return path doesn't validate messages (MessageCreate != MirixMessageUnion)
-        with patch.object(client_a.server, "send_messages", mock_send), patch(
-            "mirix.local_client.local_client.MirixResponse", Mock
+        with (
+            patch.object(client_a.server, "send_messages", mock_send),
+            patch("mirix.local_client.local_client.MirixResponse", Mock),
         ):
             messages = [MessageCreate(role=MessageRole.user, content="Hello")]
             await client_a.send_messages(
@@ -725,8 +726,9 @@ async def test_send_messages_passes_block_filter_tags_to_server(self, client_a):
     async def test_send_messages_passes_none_block_filter_tags(self, client_a):
         """LocalClient.send_messages() without block_filter_tags passes None (or omits)."""
         mock_send = AsyncMock(return_value=MirixUsageStatistics())
-        with patch.object(client_a.server, "send_messages", mock_send), patch(
-            "mirix.local_client.local_client.MirixResponse", Mock
+        with (
+            patch.object(client_a.server, "send_messages", mock_send),
+            patch("mirix.local_client.local_client.MirixResponse", Mock),
         ):
             messages = [MessageCreate(role=MessageRole.user, content="Hi")]
             await client_a.send_messages(agent_id="test-agent-id", messages=messages)
diff --git a/tests/test_memory_integration.py b/tests/test_memory_integration.py
index db685dda1..d11e6f20d 100644
--- a/tests/test_memory_integration.py
+++ b/tests/test_memory_integration.py
@@ -305,9 +305,7 @@ async def msg_api_auth(server_process):
     """Provision a dedicated client for message lifecycle tests."""
     from conftest import _create_client_and_key
 
-    auth = await _create_client_and_key(
-        MSG_TEST_CLIENT_ID, TEST_ORG_ID, org_name="Demo Org"
-    )
+    auth = await _create_client_and_key(MSG_TEST_CLIENT_ID, TEST_ORG_ID, org_name="Demo Org")
     return auth
 
 
@@ -320,12 +318,8 @@ async def msg_client(server_process, msg_api_auth):
         debug=False,
     )
     config_path = project_root / "mirix" / "configs" / "examples" / "mirix_gemini.yaml"
-    await c.initialize_meta_agent(
-        config_path=str(config_path), update_agents=False
-    )
-    await c.create_or_get_user(
-        user_id=MSG_TEST_USER_ID, user_name="Message Lifecycle User"
-    )
+    await c.initialize_meta_agent(config_path=str(config_path), update_agents=False)
+    await c.create_or_get_user(user_id=MSG_TEST_USER_ID, user_name="Message Lifecycle User")
     try:
         yield c
     finally:
@@ -344,8 +338,8 @@ async def _get_message_rows(agent_id: str, user_id: str, org_id: str):
 
     Returns all non-deleted message rows in chronological order.
     """
-    from mirix.services.message_manager import MessageManager
     from mirix.schemas.client import Client
+    from mirix.services.message_manager import MessageManager
 
     mm = MessageManager()
     actor = Client(
@@ -373,20 +367,14 @@ async def _get_sub_agent_ids(client: MirixClient):
 
     from mirix.schemas.agent import AgentState
 
-    resp = await client._request(
-        "GET", f"/agents?parent_id={meta.id}&limit=1000"
-    )
+    resp = await client._request("GET", f"/agents?parent_id={meta.id}&limit=1000")
     sub_agents = resp if isinstance(resp, list) else resp.get("agents", [])
     result = {"meta_memory_agent": meta.id}
     for data in sub_agents:
         agent = AgentState(**data)
         short = agent.name
         if "meta_memory_agent_" in short:
-            short = (
-                short.replace("meta_memory_agent_", "")
-                .replace("_memory_agent", "")
-                .replace("_agent", "")
-            )
+            short = short.replace("meta_memory_agent_", "").replace("_memory_agent", "").replace("_agent", "")
         result[short] = agent.id
     return result
 
@@ -411,12 +399,9 @@ async def test_system_prompt_stored_on_agent_not_as_message(msg_client):
     agent_id = agent_map[agent_name]
 
     new_prompt = (
-        "You are an episodic memory agent for integration testing. "
-        "Extract episodic events from conversations."
-    )
-    updated = await client.update_system_prompt(
-        agent_name=agent_name, system_prompt=new_prompt
+        "You are an episodic memory agent for integration testing. " "Extract episodic events from conversations."
     )
+    updated = await client.update_system_prompt(agent_name=agent_name, system_prompt=new_prompt)
 
     assert updated.system == new_prompt
 
@@ -429,8 +414,7 @@ async def test_system_prompt_stored_on_agent_not_as_message(msg_client):
     )
     system_msgs = [m for m in messages if m.role == "system"]
     assert len(system_msgs) == 0, (
-        f"System prompt should not be stored as a message row; "
-        f"found {len(system_msgs)} system message(s)"
+        f"System prompt should not be stored as a message row; " f"found {len(system_msgs)} system message(s)"
     )
 
 
@@ -449,12 +433,8 @@ async def test_no_messages_persisted_with_zero_retention(msg_client):
     agent_map = await _get_sub_agent_ids(client)
 
     server = _get_server()
-    db_client = await server.client_manager.get_client_by_id(
-        MSG_TEST_CLIENT_ID
-    )
-    assert (db_client.message_set_retention_count or 0) == 0, (
-        "Test client should default to retention=0"
-    )
+    db_client = await server.client_manager.get_client_by_id(MSG_TEST_CLIENT_ID)
+    assert (db_client.message_set_retention_count or 0) == 0, "Test client should default to retention=0"
 
     result = await client.add(
         user_id=MSG_TEST_USER_ID,
@@ -498,20 +478,12 @@ async def test_no_messages_persisted_with_zero_retention(msg_client):
             org_id=TEST_ORG_ID,
         )
         for message in messages:
-            content_text = " ".join(
-                [
-                    c.text
-                    for c in (message.content or [])
-                    if hasattr(c, "text") and c.text
-                ]
-            )
+            content_text = " ".join([c.text for c in (message.content or []) if hasattr(c, "text") and c.text])
             assert not any(marker in content_text for marker in synthetic_markers), (
-                f"Agent '{name}' persisted synthetic helper content with retention=0: "
-                f"{content_text}"
+                f"Agent '{name}' persisted synthetic helper content with retention=0: " f"{content_text}"
             )
         assert len(messages) == 0, (
-            f"Agent '{name}' should have 0 message rows with "
-            f"retention=0, found {len(messages)}"
+            f"Agent '{name}' should have 0 message rows with " f"retention=0, found {len(messages)}"
         )
 
 
@@ -580,8 +552,7 @@ async def test_message_retention_prunes_to_limit(msg_client):
         )
 
         assert len(messages) <= 2, (
-            f"Expected at most 2 retained message rows with "
-            f"retention=2, found {len(messages)}"
+            f"Expected at most 2 retained message rows with " f"retention=2, found {len(messages)}"
         )
 
         synthetic_markers = [
@@ -591,13 +562,7 @@ async def test_message_retention_prunes_to_limit(msg_client):
             "finish_memory_update",
         ]
         for message in messages:
-            content_text = " ".join(
-                [
-                    c.text
-                    for c in (message.content or [])
-                    if hasattr(c, "text") and c.text
-                ]
-            )
+            content_text = " ".join([c.text for c in (message.content or []) if hasattr(c, "text") and c.text])
             assert not any(marker in content_text for marker in synthetic_markers), (
                 "Retention rows should contain only persisted input message sets, "
                 f"found synthetic helper content: {content_text}"
@@ -613,8 +578,7 @@ async def test_message_retention_prunes_to_limit(msg_client):
                 org_id=TEST_ORG_ID,
             )
             assert len(sub_messages) == 0, (
-                f"Sub-agent '{name}' should not persist retained message rows; "
-                f"found {len(sub_messages)}"
+                f"Sub-agent '{name}' should not persist retained message rows; " f"found {len(sub_messages)}"
             )
 
     finally:
@@ -651,9 +615,7 @@ async def test_failed_processing_leaves_no_messages(msg_client):
         pytest.skip("Meta agent not found")
 
     server = _get_server()
-    db_client = await server.client_manager.get_client_by_id(
-        MSG_TEST_CLIENT_ID
-    )
+    db_client = await server.client_manager.get_client_by_id(MSG_TEST_CLIENT_ID)
     assert (db_client.message_set_retention_count or 0) == 0
 
     # ~2M chars / ~500k tokens — well beyond any model's context window
@@ -669,9 +631,7 @@ async def test_failed_processing_leaves_no_messages(msg_client):
                 },
                 {
                     "role": "assistant",
-                    "content": [
-                        {"type": "text", "text": "Acknowledged."}
-                    ],
+                    "content": [{"type": "text", "text": "Acknowledged."}],
                 },
             ],
         )
@@ -688,8 +648,7 @@ async def test_failed_processing_leaves_no_messages(msg_client):
             org_id=TEST_ORG_ID,
         )
         assert len(messages) == 0, (
-            f"Agent '{name}' should have 0 message rows after a "
-            f"failed processing attempt, found {len(messages)}"
+            f"Agent '{name}' should have 0 message rows after a " f"failed processing attempt, found {len(messages)}"
         )
 
 
diff --git a/tests/test_memory_server.py b/tests/test_memory_server.py
index 5174734b3..54cdabcbd 100644
--- a/tests/test_memory_server.py
+++ b/tests/test_memory_server.py
@@ -723,7 +723,17 @@ async def test_trigger_memory_update_runs_in_parallel(self, server, client, user
         lock = threading.Lock()
 
         class MockMemoryAgent:
-            def __init__(self, agent_state, interface, actor, user, filter_tags=None, block_filter_tags=None, use_cache=True, **kwargs):
+            def __init__(
+                self,
+                agent_state,
+                interface,
+                actor,
+                user,
+                filter_tags=None,
+                block_filter_tags=None,
+                use_cache=True,
+                **kwargs,
+            ):
                 self.agent_state = agent_state
 
             async def step(self, input_messages, chaining, actor=None, user=None, topics=None, retrieved_memories=None):
diff --git a/tests/test_message_handling.py b/tests/test_message_handling.py
index fc3322ff8..b119e32bc 100644
--- a/tests/test_message_handling.py
+++ b/tests/test_message_handling.py
@@ -6,6 +6,7 @@
 2. hard_delete_user_messages_for_agent deletes correct rows and keeps newest N
 3. Retention=0 path: no DB persistence after step
 4. Retention=N path: persists input messages and prunes to N newest
+5. Context overflow summarization recovery
 """
 
 import asyncio
@@ -14,14 +15,18 @@
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
+import httpx
 import pytest
 
 from mirix.agent.agent import Agent
+from mirix.errors import ContextWindowExceededError
 from mirix.schemas.agent import AgentState, AgentStepResponse, AgentType
 from mirix.schemas.client import Client
 from mirix.schemas.embedding_config import EmbeddingConfig
+from mirix.schemas.enums import MessageRole
 from mirix.schemas.llm_config import LLMConfig
 from mirix.schemas.message import Message
+from mirix.schemas.mirix_message_content import TextContent
 from mirix.schemas.openai.chat_completion_response import UsageStatistics
 from mirix.schemas.user import User
 from mirix.services.message_manager import MessageManager
@@ -139,7 +144,7 @@ def test_deletes_all_when_keep_newest_n_is_zero(self):
 
         execute_results = [
             delete_ids_result,  # select IDs to delete
-            MagicMock(),        # DELETE statement
+            MagicMock(),  # DELETE statement
         ]
 
         mock_session = AsyncMock()
@@ -278,7 +283,9 @@ async def test_step_reads_retention_from_parent_scope_for_sub_agent(self):
             parent_id="agent-meta",
         )
         agent = build_step_test_agent(agent_state, user)
-        agent.message_manager.get_messages_for_agent_user = AsyncMock(return_value=[make_runtime_message("agent-meta", "r1")])
+        agent.message_manager.get_messages_for_agent_user = AsyncMock(
+            return_value=[make_runtime_message("agent-meta", "r1")]
+        )
 
         with patch("mirix.agent.agent.LLMClient.create", return_value=object()):
             await agent.step(
@@ -379,3 +386,311 @@ async def test_step_retention_zero_skips_read_write_persistence(self):
         agent.message_manager.get_messages_for_agent_user.assert_not_awaited()
         agent.message_manager.create_many_messages.assert_not_awaited()
         agent.message_manager.hard_delete_user_messages_for_agent.assert_not_awaited()
+
+
+def _make_context_overflow_error():
+    """Create an httpx error that is_context_overflow_error() recognises."""
+    request = httpx.Request("POST", "https://api.openai.com/v1/chat/completions")
+    response = httpx.Response(
+        400,
+        json={
+            "error": {
+                "message": "This model's maximum context length is 8192 tokens",
+                "type": "invalid_request_error",
+                "code": "context_length_exceeded",
+            }
+        },
+        request=request,
+    )
+    return httpx.HTTPStatusError(
+        message="maximum context length",
+        request=request,
+        response=response,
+    )
+
+
+def build_inner_step_test_agent(agent_state: AgentState, user: User) -> Agent:
+    """Build an Agent with mocks suitable for testing inner_step directly."""
+    agent = Agent.__new__(Agent)
+    agent.agent_state = agent_state
+    agent.user = user
+    agent.user_id = user.id
+    agent.client_id = "client-1"
+    agent.model = "gpt-4o-mini"
+    agent.filter_tags = None
+    agent.block_filter_tags = None
+    agent._block_scopes = None
+    agent.blocks_in_memory = None
+    agent.last_function_response = None
+    agent.logger = MagicMock()
+    agent.block_manager = SimpleNamespace(get_blocks=AsyncMock(return_value=[]))
+    agent.message_manager = SimpleNamespace(
+        get_messages_for_agent_user=AsyncMock(return_value=[]),
+        create_many_messages=AsyncMock(return_value=[]),
+        hard_delete_user_messages_for_agent=AsyncMock(return_value=0),
+        create_message=AsyncMock(side_effect=lambda msg, **kw: msg),
+        delete_message_by_id=AsyncMock(),
+    )
+    agent.step_manager = SimpleNamespace(
+        log_step=AsyncMock(return_value=SimpleNamespace(id="step-1")),
+    )
+    agent.interface = SimpleNamespace(step_complete=lambda: None)
+    agent.actor = make_client(retention=3)
+    return agent
+
+
+class TestSummarizeAndReplaceRetainedMessages:
+    """Tests for Agent.summarize_and_replace_retained_messages()"""
+
+    @pytest.mark.asyncio
+    async def test_calls_summarize_and_persists_summary(self):
+        """Verifies the method calls summarize_messages, persists the result,
+        and deletes the original retained messages."""
+        user = make_user()
+        agent_state = make_agent_state("agent-1", AgentType.meta_memory_agent)
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        retained = [
+            make_runtime_message("agent-1", "old-msg-1"),
+            make_runtime_message("agent-1", "old-msg-2"),
+        ]
+
+        with patch("mirix.agent.agent.summarize_messages", new_callable=AsyncMock) as mock_summarize:
+            mock_summarize.return_value = "Summary of old messages"
+            result = await agent.summarize_and_replace_retained_messages(retained)
+
+        mock_summarize.assert_awaited_once()
+        assert result.message_type == "summary"
+        assert result.role == MessageRole.user
+        assert result.content[0].text == "Summary of old messages"
+
+        agent.message_manager.create_message.assert_awaited_once()
+        assert agent.message_manager.delete_message_by_id.await_count == 2
+
+    @pytest.mark.asyncio
+    async def test_summary_message_has_correct_agent_and_user(self):
+        """Summary message should be scoped to the correct agent and user."""
+        user = make_user()
+        agent_state = make_agent_state("agent-1", AgentType.meta_memory_agent)
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        retained = [make_runtime_message("agent-1", "old-msg")]
+
+        with patch("mirix.agent.agent.summarize_messages", new_callable=AsyncMock) as mock_summarize:
+            mock_summarize.return_value = "Summary"
+            result = await agent.summarize_and_replace_retained_messages(retained)
+
+        assert result.agent_id == "agent-1"
+        assert result.user_id == "user-1"
+
+    @pytest.mark.asyncio
+    async def test_summary_scoped_to_meta_agent_when_called_from_sub_agent(self):
+        """When a sub-agent summarizes, the summary message's agent_id should
+        be the parent (meta) agent, not the sub-agent itself."""
+        user = make_user()
+        agent_state = make_agent_state(
+            "agent-child",
+            AgentType.episodic_memory_agent,
+            parent_id="agent-meta",
+        )
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        retained = [make_runtime_message("agent-meta", "old-msg")]
+
+        with patch("mirix.agent.agent.summarize_messages", new_callable=AsyncMock) as mock_summarize:
+            mock_summarize.return_value = "Summary"
+            result = await agent.summarize_and_replace_retained_messages(retained)
+
+        assert result.agent_id == "agent-meta"
+        assert result.user_id == "user-1"
+
+
+class TestContextOverflowSummarizationRecovery:
+    """Tests for the summarization recovery path in inner_step."""
+
+    @pytest.mark.asyncio
+    async def test_summarization_triggered_on_overflow_with_retained_messages(self):
+        """When context overflows and retained messages exist, summarization
+        should be attempted and inner_step retried."""
+        user = make_user()
+        agent_state = make_agent_state("agent-1", AgentType.meta_memory_agent)
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        retained_msg = make_runtime_message("agent-1", "retained")
+        current_msg = make_runtime_message("agent-1", "current")
+        summary_msg = Message(
+            agent_id="agent-1",
+            role=MessageRole.user,
+            content=[TextContent(text="Summary")],
+            message_type="summary",
+        )
+
+        call_count = 0
+
+        async def mock_get_ai_reply(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                raise _make_context_overflow_error()
+            # Second call succeeds
+            return MagicMock(
+                choices=[MagicMock(message=MagicMock(content="ok", tool_calls=None))],
+                usage=UsageStatistics(),
+                id="resp-1",
+            )
+
+        agent._get_ai_reply = mock_get_ai_reply
+        agent.build_system_prompt_with_memories = AsyncMock(return_value=("system prompt", {}))
+        agent.summarize_and_replace_retained_messages = AsyncMock(return_value=summary_msg)
+        agent._handle_ai_response = AsyncMock(return_value=([], False, False))
+
+        result = await agent.inner_step(
+            messages=[current_msg],
+            accumulated=[retained_msg],
+            retained_count=1,
+            chaining=False,
+        )
+
+        agent.summarize_and_replace_retained_messages.assert_awaited_once_with([retained_msg], None)
+        assert result is not None
+
+    @pytest.mark.asyncio
+    async def test_hard_fail_when_no_retained_messages(self):
+        """When context overflows but there are no retained messages,
+        ContextWindowExceededError should be raised immediately."""
+        user = make_user()
+        agent_state = make_agent_state("agent-1", AgentType.meta_memory_agent)
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        current_msg = make_runtime_message("agent-1", "current")
+
+        agent._get_ai_reply = AsyncMock(side_effect=_make_context_overflow_error())
+        agent.build_system_prompt_with_memories = AsyncMock(return_value=("system prompt", {}))
+
+        with pytest.raises(ContextWindowExceededError):
+            await agent.inner_step(
+                messages=[current_msg],
+                accumulated=[],
+                retained_count=0,
+                chaining=False,
+            )
+
+    @pytest.mark.asyncio
+    async def test_hard_fail_after_summarization_already_attempted(self):
+        """If summarization was already attempted and context still overflows,
+        raise ContextWindowExceededError."""
+        user = make_user()
+        agent_state = make_agent_state("agent-1", AgentType.meta_memory_agent)
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        summary_msg = Message(
+            agent_id="agent-1",
+            role=MessageRole.user,
+            content=[TextContent(text="Summary")],
+            message_type="summary",
+        )
+        current_msg = make_runtime_message("agent-1", "current")
+
+        agent._get_ai_reply = AsyncMock(side_effect=_make_context_overflow_error())
+        agent.build_system_prompt_with_memories = AsyncMock(return_value=("system prompt", {}))
+
+        with pytest.raises(ContextWindowExceededError):
+            await agent.inner_step(
+                messages=[current_msg],
+                accumulated=[summary_msg],
+                retained_count=1,
+                _summarization_attempted=True,
+                chaining=False,
+            )
+
+    @pytest.mark.asyncio
+    async def test_hard_fail_when_summarization_itself_fails(self):
+        """If summarize_and_replace_retained_messages raises, the original
+        context overflow should still surface as ContextWindowExceededError."""
+        user = make_user()
+        agent_state = make_agent_state("agent-1", AgentType.meta_memory_agent)
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        retained_msg = make_runtime_message("agent-1", "retained")
+        current_msg = make_runtime_message("agent-1", "current")
+
+        agent._get_ai_reply = AsyncMock(side_effect=_make_context_overflow_error())
+        agent.build_system_prompt_with_memories = AsyncMock(return_value=("system prompt", {}))
+        agent.summarize_and_replace_retained_messages = AsyncMock(side_effect=RuntimeError("LLM summarization failed"))
+
+        with pytest.raises(ContextWindowExceededError, match="summarization recovery failed"):
+            await agent.inner_step(
+                messages=[current_msg],
+                accumulated=[retained_msg],
+                retained_count=1,
+                chaining=False,
+            )
+
+    @pytest.mark.asyncio
+    async def test_chaining_outputs_preserved_after_summarization(self):
+        """When summarization fires, chaining outputs (accumulated beyond
+        retained_count) should be preserved in the retry."""
+        user = make_user()
+        agent_state = make_agent_state("agent-1", AgentType.meta_memory_agent)
+        agent = build_inner_step_test_agent(agent_state, user)
+
+        retained_msg = make_runtime_message("agent-1", "retained")
+        chaining_msg = make_runtime_message("agent-1", "heartbeat")
+        current_msg = make_runtime_message("agent-1", "current")
+        summary_msg = Message(
+            agent_id="agent-1",
+            role=MessageRole.user,
+            content=[TextContent(text="Summary")],
+            message_type="summary",
+        )
+
+        call_count = 0
+
+        async def mock_get_ai_reply(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                raise _make_context_overflow_error()
+            return MagicMock(
+                choices=[MagicMock(message=MagicMock(content="ok", tool_calls=None))],
+                usage=UsageStatistics(),
+                id="resp-1",
+            )
+
+        agent._get_ai_reply = mock_get_ai_reply
+        agent.build_system_prompt_with_memories = AsyncMock(return_value=("system prompt", {}))
+        agent.summarize_and_replace_retained_messages = AsyncMock(return_value=summary_msg)
+        agent._handle_ai_response = AsyncMock(return_value=([], False, False))
+
+        result = await agent.inner_step(
+            messages=[current_msg],
+            accumulated=[retained_msg, chaining_msg],
+            retained_count=1,
+            chaining=False,
+        )
+
+        assert result is not None
+        # The retry should have been called with accumulated=[summary_msg, chaining_msg]
+        # and retained_count=1 (for the single summary message)
+        assert call_count == 2
+
+
+class TestMessageTypeField:
+    """Tests for the message_type field on Message schema."""
+
+    def test_default_message_type_is_original(self):
+        msg = Message(
+            agent_id="agent-1",
+            role=MessageRole.user,
+            content=[TextContent(text="hello")],
+        )
+        assert msg.message_type == "original"
+
+    def test_summary_message_type(self):
+        msg = Message(
+            agent_id="agent-1",
+            role=MessageRole.user,
+            content=[TextContent(text="summary text")],
+            message_type="summary",
+        )
+        assert msg.message_type == "summary"
diff --git a/tests/test_multi_scope_access.py b/tests/test_multi_scope_access.py
index 6a87a3b82..c03e2c2d4 100644
--- a/tests/test_multi_scope_access.py
+++ b/tests/test_multi_scope_access.py
@@ -51,9 +51,7 @@ async def test_org():
     try:
         return await org_mgr.get_organization_by_id(org_id)
     except Exception:
-        return await org_mgr.create_organization(
-            PydanticOrganization(id=org_id, name="Multi-Scope Test Org")
-        )
+        return await org_mgr.create_organization(PydanticOrganization(id=org_id, name="Multi-Scope Test Org"))
 
 
 @pytest_asyncio.fixture(scope="module", loop_scope="module")
@@ -206,9 +204,7 @@ async def no_access_client(test_org, client_manager):
 class TestReadOnlyClient:
     """Tests for clients with write_scope=None."""
 
-    async def test_read_only_client_cannot_create_memory(
-        self, raw_memory_manager, read_only_client, test_user
-    ):
+    async def test_read_only_client_cannot_create_memory(self, raw_memory_manager, read_only_client, test_user):
         """Test that a read-only client (write_scope=None) cannot create memories."""
         memory_data = RawMemoryItemCreate(
             context="Attempting to create from read-only client",
@@ -249,9 +245,7 @@ async def test_read_only_client_can_read_from_read_scopes(
             assert created.filter_tags["scope"] == "shared"
 
             # Read-only client should be able to read it (has 'shared' in read_scopes)
-            fetched = await raw_memory_manager.get_raw_memory_by_id(
-                created.id, actor=read_only_client
-            )
+            fetched = await raw_memory_manager.get_raw_memory_by_id(created.id, actor=read_only_client)
             assert fetched.id == created.id
             assert fetched.context == memory_data.context
         finally:
@@ -304,14 +298,10 @@ async def test_client_can_read_from_multiple_scopes(
 
             # multi_read_client has read_scopes=["shared", "private", "multi-read-scope"]
             # It should be able to read both memories
-            fetched_shared = await raw_memory_manager.get_raw_memory_by_id(
-                shared_memory.id, actor=multi_read_client
-            )
+            fetched_shared = await raw_memory_manager.get_raw_memory_by_id(shared_memory.id, actor=multi_read_client)
             assert fetched_shared.id == shared_memory.id
 
-            fetched_private = await raw_memory_manager.get_raw_memory_by_id(
-                private_memory.id, actor=multi_read_client
-            )
+            fetched_private = await raw_memory_manager.get_raw_memory_by_id(private_memory.id, actor=multi_read_client)
             assert fetched_private.id == private_memory.id
         finally:
             # Cleanup
@@ -400,9 +390,7 @@ async def test_writer_creates_reader_reads(
             assert memory.filter_tags["scope"] == "shared"
 
             # Reader can read it
-            fetched = await raw_memory_manager.get_raw_memory_by_id(
-                memory.id, actor=read_only_client
-            )
+            fetched = await raw_memory_manager.get_raw_memory_by_id(memory.id, actor=read_only_client)
             assert fetched.id == memory.id
         finally:
             await raw_memory_manager.delete_raw_memory(memory.id, shared_writer_client)
@@ -480,15 +468,11 @@ async def test_private_client_reads_shared_and_private(
 
         try:
             # Private client can read shared memory
-            fetched_shared = await raw_memory_manager.get_raw_memory_by_id(
-                shared_memory.id, actor=private_client
-            )
+            fetched_shared = await raw_memory_manager.get_raw_memory_by_id(shared_memory.id, actor=private_client)
             assert fetched_shared.id == shared_memory.id
 
             # Private client can read its own private memory
-            fetched_private = await raw_memory_manager.get_raw_memory_by_id(
-                private_memory.id, actor=private_client
-            )
+            fetched_private = await raw_memory_manager.get_raw_memory_by_id(private_memory.id, actor=private_client)
             assert fetched_private.id == private_memory.id
         finally:
             await raw_memory_manager.delete_raw_memory(shared_memory.id, shared_writer_client)
@@ -527,9 +511,7 @@ async def test_private_client_cannot_write_to_shared(
         finally:
             await raw_memory_manager.delete_raw_memory(shared_memory.id, shared_writer_client)
 
-    async def test_private_client_can_modify_own_scope(
-        self, raw_memory_manager, private_client, test_user
-    ):
+    async def test_private_client_can_modify_own_scope(self, raw_memory_manager, private_client, test_user):
         """Test that private client can create, update, and delete in its own scope."""
         # Create in private scope
         memory = await raw_memory_manager.create_raw_memory(
@@ -596,15 +578,11 @@ async def test_no_access_client_cannot_read_any_memory(
         try:
             # no_access_client has read_scopes=[], so it cannot read anything
             with pytest.raises(NoResultFound):
-                await raw_memory_manager.get_raw_memory_by_id(
-                    memory.id, actor=no_access_client
-                )
+                await raw_memory_manager.get_raw_memory_by_id(memory.id, actor=no_access_client)
         finally:
             await raw_memory_manager.delete_raw_memory(memory.id, shared_writer_client)
 
-    async def test_no_access_client_cannot_create_memory(
-        self, raw_memory_manager, no_access_client, test_user
-    ):
+    async def test_no_access_client_cannot_create_memory(self, raw_memory_manager, no_access_client, test_user):
         """Test that a client with no write_scope cannot create memories."""
         memory_data = RawMemoryItemCreate(
             context="Attempting to create from no-access client",
@@ -688,9 +666,7 @@ async def test_client_cannot_read_outside_read_scopes(
             # shared_writer_client only has read_scopes=["shared"]
             # It should NOT be able to read 'private' scope memory
             with pytest.raises(NoResultFound):
-                await raw_memory_manager.get_raw_memory_by_id(
-                    private_memory.id, actor=shared_writer_client
-                )
+                await raw_memory_manager.get_raw_memory_by_id(private_memory.id, actor=shared_writer_client)
         finally:
             await raw_memory_manager.delete_raw_memory(private_memory.id, private_client)
 
diff --git a/tests/test_orm_to_pydantic_safe.py b/tests/test_orm_to_pydantic_safe.py
index 1a4fc8235..95997cc17 100644
--- a/tests/test_orm_to_pydantic_safe.py
+++ b/tests/test_orm_to_pydantic_safe.py
@@ -29,21 +29,22 @@ async def test_agent_to_pydantic_with_session(server):
 
     # Get test client
     actor = server.default_client
-    
+
     # Create an agent with tools
     from mirix.schemas.agent import CreateAgent
+
     agent_create = CreateAgent(
         name="test_agent_conversion",
         llm_config=LLMConfig.default_config("gpt-4"),
         embedding_config=EmbeddingConfig.default_config("text-embedding-004"),
         include_base_tools=True,
     )
-    
+
     agent_state = await server.agent_manager.create_agent(
         agent_create=agent_create,
         actor=actor,
     )
-    
+
     # Now fetch it back and convert inside session
     async with db_context() as session:
         agent = await AgentModel.read(
@@ -51,10 +52,10 @@ async def test_agent_to_pydantic_with_session(server):
             identifier=agent_state.id,
             actor=actor,
         )
-        
+
         # This should work - we're inside the session
         pydantic_agent = agent.to_pydantic()
-        
+
         assert isinstance(pydantic_agent, PydanticAgentState)
         assert pydantic_agent.id == agent_state.id
         assert pydantic_agent.name == "test_agent_conversion"
@@ -68,21 +69,22 @@ async def test_agent_to_pydantic_detached(server):
     from mirix.server.server import db_context
 
     actor = server.default_client
-    
+
     # Create an agent
     from mirix.schemas.agent import CreateAgent
+
     agent_create = CreateAgent(
         name="test_agent_detached",
         llm_config=LLMConfig.default_config("gpt-4"),
         embedding_config=EmbeddingConfig.default_config("text-embedding-004"),
         include_base_tools=False,  # No tools to simplify
     )
-    
+
     agent_state = await server.agent_manager.create_agent(
         agent_create=agent_create,
         actor=actor,
     )
-    
+
     # Fetch agent and close session
     async with db_context() as session:
         agent = await AgentModel.read(
@@ -91,11 +93,11 @@ async def test_agent_to_pydantic_detached(server):
             actor=actor,
         )
         # Session closes here
-    
+
     # Now agent is detached - to_pydantic() should still work
     # It should use cached/loaded relationships or empty list
     pydantic_agent = agent.to_pydantic()
-    
+
     assert isinstance(pydantic_agent, PydanticAgentState)
     assert pydantic_agent.id == agent_state.id
     assert pydantic_agent.name == "test_agent_detached"
@@ -112,10 +114,10 @@ async def test_episodic_memory_to_pydantic(server):
 
     actor = server.default_client
     user = server.admin_user
-    
+
     # Create an episodic event
     from mirix.schemas.episodic_memory import EpisodicEvent as EpisodicEventCreate
-    
+
     event_data = {
         "event_type": "test_event",
         "actor": "system",
@@ -125,32 +127,28 @@ async def test_episodic_memory_to_pydantic(server):
         "user_id": user.id,
         "organization_id": user.organization_id,
     }
-    
+
     event = await server.episodic_memory_manager.create_episodic_memory(
         episodic_memory=EpisodicEventCreate(**event_data),
         actor=actor,
     )
-    
+
     # Fetch and convert inside session
     async with db_context() as session:
-        result = await session.execute(
-            select(EpisodicEvent).where(EpisodicEvent.id == event.id)
-        )
+        result = await session.execute(select(EpisodicEvent).where(EpisodicEvent.id == event.id))
         orm_event = result.scalar_one()
-        
+
         # Convert inside session
         pydantic_event = orm_event.to_pydantic()
         assert isinstance(pydantic_event, PydanticEpisodicEvent)
         assert pydantic_event.id == event.id
         assert pydantic_event.summary == "Test event for conversion"
-    
+
     # Now test after session closed
     async with db_context() as session:
-        result = await session.execute(
-            select(EpisodicEvent).where(EpisodicEvent.id == event.id)
-        )
+        result = await session.execute(select(EpisodicEvent).where(EpisodicEvent.id == event.id))
         orm_event = result.scalar_one()
-    
+
     # Session closed - to_pydantic() should still work
     pydantic_event = orm_event.to_pydantic()
     assert isinstance(pydantic_event, PydanticEpisodicEvent)
@@ -164,9 +162,10 @@ async def test_memory_models_to_pydantic(server):
 
     actor = server.default_client
     user = server.admin_user
-    
+
     # Test semantic memory
     from mirix.schemas.semantic_memory import SemanticMemoryItem as SemanticCreate
+
     semantic = await server.semantic_memory_manager.create_item(
         item_data=SemanticCreate(
             name="test_concept",
@@ -179,20 +178,19 @@ async def test_memory_models_to_pydantic(server):
         actor=actor,
         user_id=user.id,
     )
-    
+
     async with db_context() as session:
-        result = await session.execute(
-            select(SemanticMemoryItem).where(SemanticMemoryItem.id == semantic.id)
-        )
+        result = await session.execute(select(SemanticMemoryItem).where(SemanticMemoryItem.id == semantic.id))
         orm_semantic = result.scalar_one()
-    
+
     # Detached conversion
     pydantic_semantic = orm_semantic.to_pydantic()
     assert pydantic_semantic.id == semantic.id
     assert pydantic_semantic.name == "test_concept"
-    
+
     # Test procedural memory
     from mirix.schemas.procedural_memory import ProceduralMemoryItem as ProceduralCreate
+
     procedural = await server.procedural_memory_manager.create_item(
         item_data=ProceduralCreate(
             entry_type="workflow",
@@ -204,19 +202,18 @@ async def test_memory_models_to_pydantic(server):
         actor=actor,
         user_id=user.id,
     )
-    
+
     async with db_context() as session:
-        result = await session.execute(
-            select(ProceduralMemoryItem).where(ProceduralMemoryItem.id == procedural.id)
-        )
+        result = await session.execute(select(ProceduralMemoryItem).where(ProceduralMemoryItem.id == procedural.id))
         orm_procedural = result.scalar_one()
-    
+
     pydantic_procedural = orm_procedural.to_pydantic()
     assert pydantic_procedural.id == procedural.id
     assert pydantic_procedural.summary == "test_procedure"
-    
+
     # Test resource memory
     from mirix.schemas.resource_memory import ResourceMemoryItem as ResourceCreate
+
     resource = await server.resource_memory_manager.create_item(
         item_data=ResourceCreate(
             title="test_resource_title",
@@ -229,19 +226,18 @@ async def test_memory_models_to_pydantic(server):
         actor=actor,
         user_id=user.id,
     )
-    
+
     async with db_context() as session:
-        result = await session.execute(
-            select(ResourceMemoryItem).where(ResourceMemoryItem.id == resource.id)
-        )
+        result = await session.execute(select(ResourceMemoryItem).where(ResourceMemoryItem.id == resource.id))
         orm_resource = result.scalar_one()
-    
+
     pydantic_resource = orm_resource.to_pydantic()
     assert pydantic_resource.id == resource.id
     assert pydantic_resource.summary == "test_resource"
-    
+
     # Test knowledge vault
     from mirix.schemas.knowledge_vault import KnowledgeVaultItem as KnowledgeCreate
+
     knowledge = await server.knowledge_vault_manager.create_item(
         knowledge_vault_item=KnowledgeCreate(
             entry_type="credential",
@@ -255,13 +251,11 @@ async def test_memory_models_to_pydantic(server):
         actor=actor,
         user_id=user.id,
     )
-    
+
     async with db_context() as session:
-        result = await session.execute(
-            select(KnowledgeVaultItem).where(KnowledgeVaultItem.id == knowledge.id)
-        )
+        result = await session.execute(select(KnowledgeVaultItem).where(KnowledgeVaultItem.id == knowledge.id))
         orm_knowledge = result.scalar_one()
-    
+
     pydantic_knowledge = orm_knowledge.to_pydantic()
     assert pydantic_knowledge.id == knowledge.id
     assert pydantic_knowledge.caption == "test_knowledge"
@@ -271,11 +265,12 @@ async def test_memory_models_to_pydantic(server):
 async def test_list_agents_conversion_safety(server):
     """Test list_agents flow (simulating meta agent initialization)."""
     actor = server.default_client
-    
+
     # Create multiple agents
     from mirix.schemas.agent import CreateAgent
+
     agent_names = ["meta_agent", "episodic_agent", "semantic_agent"]
-    
+
     for name in agent_names:
         await server.agent_manager.create_agent(
             agent_create=CreateAgent(
@@ -286,13 +281,13 @@ async def test_list_agents_conversion_safety(server):
             ),
             actor=actor,
         )
-    
+
     # List agents (this is what MetaAgent does)
     agents = await server.agent_manager.list_agents(actor=actor)
-    
+
     # Should have all our test agents
     assert len(agents) >= 3
-    
+
     # All should be Pydantic models
     for agent in agents:
         assert isinstance(agent, PydanticAgentState)
@@ -307,9 +302,10 @@ async def test_memory_manager_list_conversion(server):
 
     actor = server.default_client
     user = server.admin_user
-    
+
     # Create test data
     from mirix.schemas.agent import CreateAgent
+
     agent = await server.agent_manager.create_agent(
         agent_create=CreateAgent(
             name="test_memory_agent",
@@ -318,8 +314,9 @@ async def test_memory_manager_list_conversion(server):
         ),
         actor=actor,
     )
-    
+
     from mirix.schemas.episodic_memory import EpisodicEvent as EpisodicCreate
+
     await server.episodic_memory_manager.create_episodic_memory(
         episodic_memory=EpisodicCreate(
             event_type="test",
@@ -332,7 +329,7 @@ async def test_memory_manager_list_conversion(server):
         ),
         actor=actor,
     )
-    
+
     # List episodic memory (this is what memory tools do)
     events = await server.episodic_memory_manager.list_episodic_memory(
         agent_state=agent,
@@ -340,7 +337,7 @@ async def test_memory_manager_list_conversion(server):
         query="",
         limit=10,
     )
-    
+
     # Should have at least our test event
     assert len(events) >= 1
     assert all(isinstance(e, PydanticEpisodicEvent) for e in events)
diff --git a/tests/test_queue.py b/tests/test_queue.py
index 8d8fe5648..acf517755 100644
--- a/tests/test_queue.py
+++ b/tests/test_queue.py
@@ -47,6 +47,7 @@
 async def _init_db():
     """Create all DB tables before any test in this module touches the database."""
     from mirix.server.server import ensure_tables_created
+
     await ensure_tables_created()
 
 
@@ -57,9 +58,7 @@ async def ensure_organization():
     try:
         await org_mgr.get_organization_by_id(TEST_QUEUE_ORG_ID)
     except Exception:
-        await org_mgr.create_organization(
-            PydanticOrganization(id=TEST_QUEUE_ORG_ID, name="Test Queue Org")
-        )
+        await org_mgr.create_organization(PydanticOrganization(id=TEST_QUEUE_ORG_ID, name="Test Queue Org"))
     return TEST_QUEUE_ORG_ID
 
 
@@ -626,9 +625,7 @@ async def test_multiple_workers_partition_isolation(self, sample_client):
 
         def make_server(worker_key):
             s = Mock()
-            s.send_messages = AsyncMock(
-                side_effect=lambda **kwargs: processed[worker_key].append(kwargs["agent_id"])
-            )
+            s.send_messages = AsyncMock(side_effect=lambda **kwargs: processed[worker_key].append(kwargs["agent_id"]))
             s.client_manager = Mock()
             s.client_manager.get_client_by_id = AsyncMock(return_value=sample_client)
             return s
@@ -910,9 +907,7 @@ async def test_multiple_messages_processing(self, clean_manager, mock_server, sa
         await manager.cleanup()
 
     @pytest.mark.asyncio
-    async def test_worker_handles_processing_errors(
-        self, clean_manager, mock_server, sample_client, sample_messages
-    ):
+    async def test_worker_handles_processing_errors(self, clean_manager, mock_server, sample_client, sample_messages):
         manager = clean_manager
 
         mock_server.send_messages = AsyncMock(side_effect=Exception("Processing error"))
diff --git a/tests/test_raw_memory.py b/tests/test_raw_memory.py
index 7d76bacf2..dfc94264e 100644
--- a/tests/test_raw_memory.py
+++ b/tests/test_raw_memory.py
@@ -41,8 +41,6 @@
 # =================================================================
 
 
-
-
 @pytest.fixture
 def raw_memory_manager():
     """Provide a RawMemoryManager instance."""
@@ -404,12 +402,8 @@ async def test_cleanup_job_deletes_stale_memories(raw_memory_manager, test_actor
 
         from mirix.orm.raw_memory import RawMemory
 
-        naive_utc_15_days_ago = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=15))
-        stmt = (
-            update(RawMemory)
-            .where(RawMemory.id == old_memory.id)
-            .values(updated_at=naive_utc_15_days_ago)
-        )
+        naive_utc_15_days_ago = datetime.now(UTC).replace(tzinfo=None) - timedelta(days=15)
+        stmt = update(RawMemory).where(RawMemory.id == old_memory.id).values(updated_at=naive_utc_15_days_ago)
         await session.execute(stmt)
         await session.commit()
 
@@ -465,10 +459,8 @@ async def test_cleanup_job_respects_custom_threshold(raw_memory_manager, test_ac
 
         from mirix.orm.raw_memory import RawMemory
 
-        naive_utc_8_days_ago = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=8))
-        stmt = (
-            update(RawMemory).where(RawMemory.id == memory.id).values(updated_at=naive_utc_8_days_ago)
-        )
+        naive_utc_8_days_ago = datetime.now(UTC).replace(tzinfo=None) - timedelta(days=8)
+        stmt = update(RawMemory).where(RawMemory.id == memory.id).values(updated_at=naive_utc_8_days_ago)
         await session.execute(stmt)
         await session.commit()
 
@@ -826,7 +818,9 @@ async def test_api_update_raw_memory_replace(
 
     api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY") or os.getenv("MIRIX_GOOGLE_API_KEY")
     if not api_key:
-        pytest.skip("Skipping API test with embeddings - no Google/Gemini API key (set GEMINI_API_KEY, GOOGLE_API_KEY, or MIRIX_GOOGLE_API_KEY)")
+        pytest.skip(
+            "Skipping API test with embeddings - no Google/Gemini API key (set GEMINI_API_KEY, GOOGLE_API_KEY, or MIRIX_GOOGLE_API_KEY)"
+        )
 
     # Create a raw memory first
     sample_data = RawMemoryItemCreate(
@@ -881,7 +875,9 @@ async def test_api_update_raw_memory_append_and_merge(
 
     api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY") or os.getenv("MIRIX_GOOGLE_API_KEY")
     if not api_key:
-        pytest.skip("Skipping API test with embeddings - no Google/Gemini API key (set GEMINI_API_KEY, GOOGLE_API_KEY, or MIRIX_GOOGLE_API_KEY)")
+        pytest.skip(
+            "Skipping API test with embeddings - no Google/Gemini API key (set GEMINI_API_KEY, GOOGLE_API_KEY, or MIRIX_GOOGLE_API_KEY)"
+        )
 
     # Create a raw memory first
     sample_data = RawMemoryItemCreate(
@@ -962,9 +958,9 @@ async def test_api_delete_raw_memory(api_client, test_actor, test_user, test_age
 
     # GET after DELETE must return 404 (same server DB and cache)
     get_response = api_client.get(f"/memory/raw/{memory_id}", params={"user_id": test_user.id})
-    assert get_response.status_code == 404, (
-        f"GET after DELETE should return 404, got {get_response.status_code}: {get_response.text}"
-    )
+    assert (
+        get_response.status_code == 404
+    ), f"GET after DELETE should return 404, got {get_response.status_code}: {get_response.text}"
 
     print(f"\n[OK] DELETE /memory/raw/{memory_id} successful")
 
@@ -1872,7 +1868,7 @@ async def test_search_raw_memories_limit_enforcement(raw_memory_manager, test_ac
             user_id=test_user.id,
             use_cache=False,
         )
-        memories.append(mem    )
+        memories.append(mem)
 
     # Test limit=2
     results, _ = await raw_memory_manager.search_raw_memories(
diff --git a/tests/test_raw_memory_with_real_embeddings.py b/tests/test_raw_memory_with_real_embeddings.py
index dc77ac84d..d3faf3c55 100644
--- a/tests/test_raw_memory_with_real_embeddings.py
+++ b/tests/test_raw_memory_with_real_embeddings.py
@@ -84,7 +84,9 @@ async def main():
             raise ValueError("not found")
         print(f"[OK] Using existing organization: {org_id}")
     except Exception:
-        org = await org_mgr.create_organization(PydanticOrganization(id=org_id, name="Test Organization for Embeddings"))
+        org = await org_mgr.create_organization(
+            PydanticOrganization(id=org_id, name="Test Organization for Embeddings")
+        )
         print(f"[OK] Created organization: {org_id}")
 
     # Create client
diff --git a/tests/test_redis_integration.py b/tests/test_redis_integration.py
index 8e64a48dd..a9215168f 100644
--- a/tests/test_redis_integration.py
+++ b/tests/test_redis_integration.py
@@ -38,8 +38,6 @@
 pytestmark = pytest.mark.asyncio(loop_scope="module")
 
 from mirix.database.redis_client import RedisMemoryClient, get_redis_client, initialize_redis_client
-
-
 from mirix.log import get_logger
 from mirix.schemas.agent import AgentType, CreateAgent, UpdateAgent
 from mirix.schemas.block import Block as PydanticBlock
@@ -869,7 +867,9 @@ async def test_message_create_with_redis(self, message_manager, test_client, tes
         # Cleanup
         await message_manager.delete_message_by_id(created_message.id, test_client)
 
-    async def test_message_cache_hit_performance(self, message_manager, test_client, test_user, test_agent, redis_client):
+    async def test_message_cache_hit_performance(
+        self, message_manager, test_client, test_user, test_agent, redis_client
+    ):
         """Test message cache hit is significantly faster than DB."""
         # Create message
         message_data = PydanticMessage(
@@ -936,7 +936,9 @@ async def test_episodic_create_with_redis(self, episodic_manager, test_client, t
         # Cleanup
         await episodic_manager.delete_event_by_id(created_event.id, test_client)
 
-    async def test_episodic_cache_with_embeddings(self, episodic_manager, test_client, test_user, test_agent, redis_client):
+    async def test_episodic_cache_with_embeddings(
+        self, episodic_manager, test_client, test_user, test_agent, redis_client
+    ):
         """Test episodic memory with embeddings caches correctly."""
         # Create mock embeddings (4096 dimensions)
         mock_embedding = [0.1] * 4096
@@ -1078,7 +1080,9 @@ async def test_procedural_create_with_embeddings(
 class TestResourceMemoryManagerRedis:
     """Test Resource Memory Manager with Redis JSON caching (1 embedding)."""
 
-    async def test_resource_create_with_embedding(self, resource_manager, test_client, test_user, test_agent, redis_client):
+    async def test_resource_create_with_embedding(
+        self, resource_manager, test_client, test_user, test_agent, redis_client
+    ):
         """Test resource memory with 1 embedding (16KB) caches to Redis JSON."""
         # Create mock embedding
         from mirix.schemas.resource_memory import ResourceMemoryItem
@@ -1122,7 +1126,9 @@ async def test_resource_create_with_embedding(self, resource_manager, test_clien
 class TestKnowledgeVaultManagerRedis:
     """Test Knowledge Vault Manager with Redis JSON caching (1 embedding)."""
 
-    async def test_knowledge_create_with_embedding(self, knowledge_manager, test_client, test_user, test_agent, redis_client):
+    async def test_knowledge_create_with_embedding(
+        self, knowledge_manager, test_client, test_user, test_agent, redis_client
+    ):
         """Test knowledge vault item with 1 embedding (16KB) caches to Redis JSON."""
         # Create mock embedding
         from mirix.schemas.knowledge_vault import KnowledgeVaultItem
@@ -1233,7 +1239,9 @@ async def test_block_cache_speedup(self, block_manager, test_client, test_user,
         for block in blocks:
             await block_manager.delete_block(block.id, test_client)
 
-    async def test_message_cache_vs_db_comparison(self, message_manager, test_client, test_user, test_agent, redis_client):
+    async def test_message_cache_vs_db_comparison(
+        self, message_manager, test_client, test_user, test_agent, redis_client
+    ):
         """Compare Redis cache vs PostgreSQL performance for messages."""
         # Create test message
         message_data = PydanticMessage(
diff --git a/tests/test_scoped_blocks.py b/tests/test_scoped_blocks.py
index f68f1a9b3..2cad92717 100644
--- a/tests/test_scoped_blocks.py
+++ b/tests/test_scoped_blocks.py
@@ -952,7 +952,9 @@ async def test_two_clients_same_scope_share_blocks(self, block_manager, test_org
             )
         )
         blocks_via_a = await block_manager.get_blocks(user=target_user, any_scopes=[scope])
-        blocks_via_b = await block_manager.get_blocks(user=target_user, any_scopes=[scope], auto_create_from_default=False)
+        blocks_via_b = await block_manager.get_blocks(
+            user=target_user, any_scopes=[scope], auto_create_from_default=False
+        )
         assert {b.id for b in blocks_via_a} == {b.id for b in blocks_via_b}
 
     async def test_reader_client_sees_multiple_scopes(
@@ -978,7 +980,9 @@ async def test_reader_client_sees_multiple_scopes(
         assert "test-scope-1" in scopes_found
         assert "test-scope-2" in scopes_found
 
-    async def test_reader_client_cannot_see_ungranted_scope(self, block_manager, client_scope1, client_scope2, test_org):
+    async def test_reader_client_cannot_see_ungranted_scope(
+        self, block_manager, client_scope1, client_scope2, test_org
+    ):
         """Reader with read_scopes=["test-scope-1"] cannot see test-scope-2 blocks."""
         restricted_reader = await ClientManager().create_client(
             PydanticClient(
diff --git a/tests/test_search_all_users.py b/tests/test_search_all_users.py
index c1e296637..0da28129d 100644
--- a/tests/test_search_all_users.py
+++ b/tests/test_search_all_users.py
@@ -397,9 +397,7 @@ async def _search_client1_bm25():
         results = await poll_until(
             fetch_results=_search_client1_bm25,
             is_ready=lambda r: r["count"] > 0,
-            wait_log=(
-                "Client1 bm25 search returned 0; waiting %ss before retry (elapsed=%ss)..."
-            ),
+            wait_log=("Client1 bm25 search returned 0; waiting %ss before retry (elapsed=%ss)..."),
         )
 
         logger.info(f"Results: {results['count']} memories found")
@@ -454,9 +452,7 @@ async def _search_client1_embedding():
         results = await poll_until(
             fetch_results=_search_client1_embedding,
             is_ready=lambda r: r["count"] > 0,
-            wait_log=(
-                "Client1 embedding search returned 0; waiting %ss before retry (elapsed=%ss)..."
-            ),
+            wait_log=("Client1 embedding search returned 0; waiting %ss before retry (elapsed=%ss)..."),
         )
 
         logger.info(f"Results: {results['count']} memories found")
@@ -543,16 +539,12 @@ async def test_search_with_client3_retrieves_only_user3(self, client3, user3_id,
 
         # Search with client3 which has write_scope='read_only'
         async def _search_client3_bm25():
-            return await client3.search_all_users(
-                query="", memory_type="all", client_id=client3.client_id, limit=100
-            )
+            return await client3.search_all_users(query="", memory_type="all", client_id=client3.client_id, limit=100)
 
         results = await poll_until(
             fetch_results=_search_client3_bm25,
             is_ready=lambda r: user3_id in set(result["user_id"] for result in r["results"]),
-            wait_log=(
-                "Client3 bm25 search missing user3; waiting %ss before retry (elapsed=%ss)..."
-            ),
+            wait_log=("Client3 bm25 search missing user3; waiting %ss before retry (elapsed=%ss)..."),
         )
 
         logger.info(f"Results: {results['count']} memories found")
@@ -588,9 +580,7 @@ async def _search_client3_embedding():
         results = await poll_until(
             fetch_results=_search_client3_embedding,
             is_ready=lambda r: user3_id in set(result["user_id"] for result in r["results"]),
-            wait_log=(
-                "Client3 embedding search missing user3; waiting %ss before retry (elapsed=%ss)..."
-            ),
+            wait_log=("Client3 embedding search missing user3; waiting %ss before retry (elapsed=%ss)..."),
         )
 
         logger.info(f"Results: {results['count']} memories found")
@@ -619,9 +609,7 @@ async def test_search_different_org_no_cross_contamination(
         # Search with client2 (in org2). Poll briefly because async memory
         # extraction can lag under heavier CI/local runs.
         async def _search_client2_bm25():
-            return await client2.search_all_users(
-                query="", memory_type="all", client_id=client2.client_id, limit=100
-            )
+            return await client2.search_all_users(query="", memory_type="all", client_id=client2.client_id, limit=100)
 
         results = await poll_until(
             fetch_results=_search_client2_bm25,
@@ -662,9 +650,7 @@ async def _search_client2_embedding():
         results = await poll_until(
             fetch_results=_search_client2_embedding,
             is_ready=lambda r: user4_id in set(result["user_id"] for result in r["results"]),
-            wait_log=(
-                "Org2 embedding search missing user4; waiting %ss before retry (elapsed=%ss)..."
-            ),
+            wait_log=("Org2 embedding search missing user4; waiting %ss before retry (elapsed=%ss)..."),
         )
 
         user_ids_in_results = set(result["user_id"] for result in results["results"])
@@ -708,7 +694,9 @@ async def test_search_specific_memory_type(self, client1, user1_id, user2_id):
         logger.info("TEST: Search specific memory type (episodic)")
         logger.info("=" * 80)
 
-        results = await client1.search_all_users(query="team", memory_type="episodic", client_id=client1.client_id, limit=20)
+        results = await client1.search_all_users(
+            query="team", memory_type="episodic", client_id=client1.client_id, limit=20
+        )
 
         logger.info(f"Results: {results['count']} episodic memories found")
 
@@ -751,9 +739,7 @@ async def _search_semantic_embedding():
         results = await poll_until(
             fetch_results=_search_semantic_embedding,
             is_ready=lambda r: r["count"] > 0,
-            wait_log=(
-                "Semantic embedding search returned 0; waiting %ss before retry (elapsed=%ss)..."
-            ),
+            wait_log=("Semantic embedding search returned 0; waiting %ss before retry (elapsed=%ss)..."),
         )
 
         logger.info(f"Results: {results['count']} semantic memories found")
@@ -761,9 +747,9 @@ async def _search_semantic_embedding():
 
         assert results["success"] is True
         assert results["search_method"] == "embedding"
-        assert results["count"] > 0, (
-            "Semantic embedding search still 0 results after waiting for retries (index may not be ready)."
-        )
+        assert (
+            results["count"] > 0
+        ), "Semantic embedding search still 0 results after waiting for retries (index may not be ready)."
 
         # All results should be semantic type
         for result in results["results"]:
@@ -899,7 +885,9 @@ async def test_search_all_users_include_core_memory_returns_core_section(self, c
 
         assert results["success"] is True
         core_results = [r for r in results["results"] if r.get("memory_type") == "core"]
-        assert len(core_results) > 0, "Results should include items with memory_type='core' when include_core_memory=True"
+        assert (
+            len(core_results) > 0
+        ), "Results should include items with memory_type='core' when include_core_memory=True"
         for item in core_results:
             assert "id" in item
             assert "label" in item
@@ -939,9 +927,9 @@ async def test_search_all_users_include_core_memory_scope_isolation(self, client
             "Blocks from scope 'read_write' (client1's scope) must be returned. Scopes returned: %s" % scopes_returned
         )
         read_write_items = [r for r in core_results if r["scope"] == "read_write"]
-        assert len(read_write_items) > 0, (
-            "At least one block from scope 'read_write' must be returned. core count=%s" % len(core_results)
-        )
+        assert (
+            len(read_write_items) > 0
+        ), "At least one block from scope 'read_write' must be returned. core count=%s" % len(core_results)
 
         logger.info(
             "Scopes in core results: %s; read_write blocks: %s (read_only correctly excluded)",
diff --git a/tests/test_search_single_user_core_memory.py b/tests/test_search_single_user_core_memory.py
index ca3691138..152f0bf37 100644
--- a/tests/test_search_single_user_core_memory.py
+++ b/tests/test_search_single_user_core_memory.py
@@ -204,12 +204,12 @@ async def test_core_memory_scope_isolation(self, client1, client2, user_id):
         scopes_a = set(r["scope"] for r in core_a)
         scopes_b = set(r["scope"] for r in core_b)
 
-        assert "scope_b" not in scopes_a, (
-            f"Client1 (scope_a) should not see scope_b blocks. Scopes returned: {scopes_a}"
-        )
-        assert "scope_a" not in scopes_b, (
-            f"Client2 (scope_b) should not see scope_a blocks. Scopes returned: {scopes_b}"
-        )
+        assert (
+            "scope_b" not in scopes_a
+        ), f"Client1 (scope_a) should not see scope_b blocks. Scopes returned: {scopes_a}"
+        assert (
+            "scope_a" not in scopes_b
+        ), f"Client2 (scope_b) should not see scope_a blocks. Scopes returned: {scopes_b}"
 
         if core_a:
             assert "scope_a" in scopes_a, f"Client1 should see scope_a blocks. Scopes: {scopes_a}"
@@ -239,9 +239,9 @@ async def test_include_core_memory_with_specific_memory_type(self, client1, user
         for r in non_core:
             assert r["memory_type"] == "episodic", "Non-core results should all be episodic"
 
-        assert len(core_results) > 0, (
-            "Core blocks should be returned even when memory_type='episodic' and include_core_memory=True"
-        )
+        assert (
+            len(core_results) > 0
+        ), "Core blocks should be returned even when memory_type='episodic' and include_core_memory=True"
         logger.info(
             "Test passed: %d core blocks + %d episodic results returned",
             len(core_results),
diff --git a/tests/test_temporal_queries.py b/tests/test_temporal_queries.py
index 5feebe208..664215b8b 100644
--- a/tests/test_temporal_queries.py
+++ b/tests/test_temporal_queries.py
@@ -163,9 +163,7 @@ def server_process():
             return
     except (requests.ConnectionError, requests.Timeout):
         pass
-    pytest.skip(
-        "Server not running. Start with: python scripts/start_server.py --port=8000"
-    )
+    pytest.skip("Server not running. Start with: python scripts/start_server.py --port=8000")
 
 
 @pytest_asyncio.fixture(scope="module")
@@ -173,9 +171,7 @@ async def api_auth(server_process):
     """Create org and client once per module; yield auth for client creation."""
     from conftest import _create_client_and_key
 
-    auth = await _create_client_and_key(
-        "temporal-test-client", "temporal-test-org", org_name="Temporal Test Org"
-    )
+    auth = await _create_client_and_key("temporal-test-client", "temporal-test-org", org_name="Temporal Test Org")
     os.environ.setdefault("MIRIX_API_URL", "http://localhost:8000")
     os.environ["MIRIX_API_KEY"] = auth["api_key"]
     return auth
@@ -235,9 +231,7 @@ async def test_retrieve_with_temporal_expression(self, temporal_client):
 
         result = await temporal_client.retrieve_with_conversation(
             user_id=TEST_USER_ID_TEMPORAL,
-            messages=[
-                {"role": "user", "content": [{"type": "text", "text": "What did I do today?"}]}
-            ],
+            messages=[{"role": "user", "content": [{"type": "text", "text": "What did I do today?"}]}],
             limit=10,
         )
 
@@ -262,9 +256,7 @@ async def test_retrieve_with_explicit_date_range(self, temporal_client):
 
         result = await temporal_client.retrieve_with_conversation(
             user_id=TEST_USER_ID_TEMPORAL,
-            messages=[
-                {"role": "user", "content": [{"type": "text", "text": "Show me November 2025 events"}]}
-            ],
+            messages=[{"role": "user", "content": [{"type": "text", "text": "Show me November 2025 events"}]}],
             limit=10,
             start_date=start,
             end_date=end,
@@ -293,9 +285,7 @@ async def test_temporal_filtering_episodic_only(self, temporal_client):
             messages=[
                 {
                     "role": "user",
-                    "content": [
-                        {"type": "text", "text": "I learned that Python uses list comprehensions."}
-                    ],
+                    "content": [{"type": "text", "text": "I learned that Python uses list comprehensions."}],
                 },
                 {"role": "assistant", "content": [{"type": "text", "text": "Noted."}]},
             ],
@@ -306,9 +296,7 @@ async def test_temporal_filtering_episodic_only(self, temporal_client):
         end = "2025-12-31T23:59:59"
         result = await temporal_client.retrieve_with_conversation(
             user_id=TEST_USER_ID_TEMPORAL,
-            messages=[
-                {"role": "user", "content": [{"type": "text", "text": "What do you know about me?"}]}
-            ],
+            messages=[{"role": "user", "content": [{"type": "text", "text": "What do you know about me?"}]}],
             limit=10,
             start_date=start,
             end_date=end,
diff --git a/tests/test_user.py b/tests/test_user.py
index f25543741..bedf3bafe 100644
--- a/tests/test_user.py
+++ b/tests/test_user.py
@@ -110,7 +110,9 @@ async def test_explicit_user_creation_then_add_memory(client):
 
     # Step 2: Create user explicitly
     print(f"[Step 2] Creating user with create_or_get_user()...")
-    created_user_id = await client.create_or_get_user(user_id=user_id, user_name=f"Test User {user_id}", org_id=TEST_ORG_ID)
+    created_user_id = await client.create_or_get_user(
+        user_id=user_id, user_name=f"Test User {user_id}", org_id=TEST_ORG_ID
+    )
     print(f"[OK] User created: {created_user_id}")
     assert created_user_id == user_id, "Returned user_id should match requested user_id"
 
@@ -137,7 +139,9 @@ async def test_explicit_user_creation_then_add_memory(client):
 
     filter_tags = {"test_type": "explicit_creation", "account_id": "ACC-001"}
 
-    response = await client.add(user_id=user_id, messages=messages, filter_tags=filter_tags, chaining=False, verbose=False)
+    response = await client.add(
+        user_id=user_id, messages=messages, filter_tags=filter_tags, chaining=False, verbose=False
+    )
 
     print(f"[OK] Memory add request submitted")
     print(f"     Response: {response}")
@@ -204,7 +208,9 @@ async def test_auto_user_creation_on_add_memory(client):
 
     filter_tags = {"test_type": "auto_creation", "region": "West"}
 
-    response = await client.add(user_id=user_id, messages=messages, filter_tags=filter_tags, chaining=False, verbose=False)
+    response = await client.add(
+        user_id=user_id, messages=messages, filter_tags=filter_tags, chaining=False, verbose=False
+    )
 
     print(f"[OK] Memory add request submitted")
     print(f"     Response: {response}")
@@ -253,14 +259,18 @@ async def test_idempotent_create_or_get_user(client):
     user_id = f"test-idempotent-user-{uuid.uuid4().hex[:8]}"
     print(f"\n[Step 1] Creating user: {user_id}")
 
-    created_user_id_1 = await client.create_or_get_user(user_id=user_id, user_name="Idempotent Test User", org_id=TEST_ORG_ID)
+    created_user_id_1 = await client.create_or_get_user(
+        user_id=user_id, user_name="Idempotent Test User", org_id=TEST_ORG_ID
+    )
     print(f"[OK] User created (1st call): {created_user_id_1}")
 
     # Step 2: Call again with same user_id
     print(f"[Step 2] Calling create_or_get_user() again with same user_id...")
     await asyncio.sleep(1)  # Small delay
 
-    created_user_id_2 = await client.create_or_get_user(user_id=user_id, user_name="Idempotent Test User", org_id=TEST_ORG_ID)
+    created_user_id_2 = await client.create_or_get_user(
+        user_id=user_id, user_name="Idempotent Test User", org_id=TEST_ORG_ID
+    )
     print(f"[OK] User retrieved (2nd call): {created_user_id_2}")
 
     # Step 3: Verify same user_id returned
diff --git a/tests/test_user_manager.py b/tests/test_user_manager.py
index 3d89e8977..be5327f06 100644
--- a/tests/test_user_manager.py
+++ b/tests/test_user_manager.py
@@ -212,9 +212,7 @@ async def test_create_user_is_organization_scoped(self, user_manager, test_org1)
         except Exception:
             pass
 
-    async def test_same_user_id_retrieved_by_different_contexts(
-        self, user_manager, test_org1, client_a, client_b
-    ):
+    async def test_same_user_id_retrieved_by_different_contexts(self, user_manager, test_org1, client_a, client_b):
         """
         Verify that a user created in an org can be retrieved regardless of client context.
 
@@ -252,9 +250,7 @@ class TestMultipleClientsSameOrgShareUsers:
 
     pytestmark = pytest.mark.asyncio(loop_scope="module")
 
-    async def test_multiple_clients_same_org_see_same_users(
-        self, user_manager, test_org1, client_a, client_b
-    ):
+    async def test_multiple_clients_same_org_see_same_users(self, user_manager, test_org1, client_a, client_b):
         """
         Verify that two clients in the same organization see the same users.
 
@@ -292,9 +288,7 @@ async def test_multiple_clients_same_org_see_same_users(
                 except Exception:
                     pass
 
-    async def test_user_count_not_multiplied_by_clients(
-        self, user_manager, test_org1, client_a, client_b
-    ):
+    async def test_user_count_not_multiplied_by_clients(self, user_manager, test_org1, client_a, client_b):
         """
         Verify that having multiple clients doesn't multiply user count.
 
@@ -315,9 +309,7 @@ async def test_user_count_not_multiplied_by_clients(
             users = await user_manager.list_users(organization_id=test_org1.id)
             user_occurrences = [u for u in users if u.id == user_id]
 
-            assert len(user_occurrences) == 1, (
-                f"User should appear exactly once, got {len(user_occurrences)}"
-            )
+            assert len(user_occurrences) == 1, f"User should appear exactly once, got {len(user_occurrences)}"
 
         finally:
             try:
@@ -336,9 +328,7 @@ class TestUsersIsolatedAcrossOrganizations:
 
     pytestmark = pytest.mark.asyncio(loop_scope="module")
 
-    async def test_list_users_filters_by_organization(
-        self, user_manager, test_org1, test_org2
-    ):
+    async def test_list_users_filters_by_organization(self, user_manager, test_org1, test_org2):
         """
         Verify list_users filters by organization_id.
 
@@ -383,13 +373,9 @@ async def test_list_users_filters_by_organization(
             for uid in org2_user_ids:
                 assert uid in org2_retrieved_ids, f"Org2 user {uid} not in org2 list"
             for uid in org1_user_ids:
-                assert uid not in org2_retrieved_ids, (
-                    f"Org1 user {uid} should not be in org2 list"
-                )
+                assert uid not in org2_retrieved_ids, f"Org1 user {uid} should not be in org2 list"
             for uid in org2_user_ids:
-                assert uid not in org1_retrieved_ids, (
-                    f"Org2 user {uid} should not be in org1 list"
-                )
+                assert uid not in org1_retrieved_ids, f"Org2 user {uid} should not be in org1 list"
 
         finally:
             for uid in org1_user_ids + org2_user_ids:
@@ -409,9 +395,7 @@ class TestClientDeletionPreservesUsers:
 
     pytestmark = pytest.mark.asyncio(loop_scope="module")
 
-    async def test_delete_client_preserves_users(
-        self, user_manager, client_manager, test_org1
-    ):
+    async def test_delete_client_preserves_users(self, user_manager, client_manager, test_org1):
         """
         Verify deleting a client does NOT cascade-delete users.
 
@@ -445,9 +429,7 @@ async def test_delete_client_preserves_users(
             await client_manager.delete_client_by_id(client_id)
 
             user_after_delete = await user_manager.get_user_by_id(user_id)
-            assert user_after_delete.id == user_id, (
-                "User should still exist after client deletion"
-            )
+            assert user_after_delete.id == user_id, "User should still exist after client deletion"
             assert user_after_delete.organization_id == test_org1.id
 
         finally:
@@ -467,13 +449,9 @@ class TestGetOrCreateOrgDefaultUser:
 
     pytestmark = pytest.mark.asyncio(loop_scope="module")
 
-    async def test_get_or_create_org_default_user_creates_user(
-        self, user_manager, test_org1
-    ):
+    async def test_get_or_create_org_default_user_creates_user(self, user_manager, test_org1):
         """Verify get_or_create_org_default_user creates a default user for the org."""
-        default_user = await user_manager.get_or_create_org_default_user(
-            org_id=test_org1.id
-        )
+        default_user = await user_manager.get_or_create_org_default_user(org_id=test_org1.id)
 
         assert default_user is not None
         assert default_user.organization_id == test_org1.id
@@ -484,40 +462,24 @@ async def test_get_or_create_org_default_user_creates_user(
         except Exception:
             pass
 
-    async def test_get_or_create_org_default_user_is_idempotent(
-        self, user_manager, test_org1
-    ):
+    async def test_get_or_create_org_default_user_is_idempotent(self, user_manager, test_org1):
         """Verify get_or_create_org_default_user returns the same user on repeated calls."""
-        default_user_1 = await user_manager.get_or_create_org_default_user(
-            org_id=test_org1.id
-        )
-        default_user_2 = await user_manager.get_or_create_org_default_user(
-            org_id=test_org1.id
-        )
+        default_user_1 = await user_manager.get_or_create_org_default_user(org_id=test_org1.id)
+        default_user_2 = await user_manager.get_or_create_org_default_user(org_id=test_org1.id)
 
-        assert default_user_1.id == default_user_2.id, (
-            "Should return same user on repeated calls"
-        )
+        assert default_user_1.id == default_user_2.id, "Should return same user on repeated calls"
 
         try:
             await user_manager.delete_user_by_id(default_user_1.id)
         except Exception:
             pass
 
-    async def test_get_or_create_org_default_user_different_orgs(
-        self, user_manager, test_org1, test_org2
-    ):
+    async def test_get_or_create_org_default_user_different_orgs(self, user_manager, test_org1, test_org2):
         """Verify get_or_create_org_default_user creates separate users for different orgs."""
-        default_user_org1 = await user_manager.get_or_create_org_default_user(
-            org_id=test_org1.id
-        )
-        default_user_org2 = await user_manager.get_or_create_org_default_user(
-            org_id=test_org2.id
-        )
+        default_user_org1 = await user_manager.get_or_create_org_default_user(org_id=test_org1.id)
+        default_user_org2 = await user_manager.get_or_create_org_default_user(org_id=test_org2.id)
 
-        assert default_user_org1.id != default_user_org2.id, (
-            "Different orgs should have different default users"
-        )
+        assert default_user_org1.id != default_user_org2.id, "Different orgs should have different default users"
         assert default_user_org1.organization_id == test_org1.id
         assert default_user_org2.organization_id == test_org2.id