collinear-ai · AnandK27 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/cookbook/README.md b/cookbook/README.md
@@ -28,3 +28,4 @@ The agent will walk through each step, ask you for any required inputs (model, t
 | [openai-agents-sdk](openai-agents-sdk/) | Customer-style OpenAI Agents SDK cookbook showing how to keep an existing agent app and add a thin SimLab adapter. |
 | [secure-agent-eval](secure-agent-eval/) | Evaluate agent behavior through OneCLI's credential proxy — compare correctness, audit for credential leakage, and test rate limit resilience. |
 | [simlab-auto-research](simlab-auto-research/) | Autonomous system prompt optimization using the [auto-research](https://github.com/karpathy/autoresearch) pattern. An outer agent iterates on prompts, measured by SimLab task scores. |
+| [prime-rl-training](prime-rl-training/) | Collect SimLab trajectories and train agent models with Prime Intellect's prime-rl (SFT warmup + hosted RL). |
diff --git a/cookbook/prime-rl-training/.gitignore b/cookbook/prime-rl-training/.gitignore
@@ -0,0 +1,9 @@
+# Generated artifacts (re-create with run_pipeline.sh)
+output/
+dataset/
+dist/
+.prime/
+__pycache__/
+*.pyc
+taskgen/
+generated-tasks/
diff --git a/cookbook/prime-rl-training/SKILL.md b/cookbook/prime-rl-training/SKILL.md
@@ -0,0 +1,161 @@
+# Prime-RL Training with SimLab Trajectories
+
+Train agent models with Prime Intellect's prime-rl using SimLab-collected trajectories.
+
+## Prerequisites
+
+Before starting, confirm:
+
+1. SimLab is installed: `simlab --version`
+2. prime CLI is installed: `prime --version`
+3. `SIMLAB_COLLINEAR_API_KEY` is set
+4. `PRIME_API_KEY` is set
+5. `OPENAI_API_KEY` is set (for baseline agent)
+
+If any prerequisite is missing, tell the user what to set and **wait before proceeding**.
+
+## Workflow
+
+### 1. Install cookbook dependencies
+
+```bash
+cd cookbook/prime-rl-training
+uv sync
+```
+
+### 2. Create SimLab environment
+
+```bash
+simlab templates list
+```
+
+Ask the user which template to use (default: `customer_service`).
+
+```bash
+simlab env init prime-rl-env --template <template>
+```
+
+### 3. Generate tasks
+
+```bash
+simlab tasks-gen init --preset customer_support --output-dir ./taskgen
+simlab tasks-gen run --config taskgen/config.toml
+```
+
+Wait for task generation to complete before proceeding.
+
+### 4. List tasks and select for rollouts
+
+```bash
+simlab tasks list --tasks-dir ./generated-tasks
+```
+
+Note the task IDs.
+
+### 5. Start environment and collect trajectories
+
+```bash
+simlab env up prime-rl-env
+```
+
+Wait for all services to become healthy, then run tasks:
+
+```bash
+simlab tasks run \
+  --env prime-rl-env \
+  --task <task_id> \
+  --tasks-dir ./generated-tasks \
+  --agent-model gpt-4.1-mini \
+  --agent-api-key "$OPENAI_API_KEY"
+```
+
+Repeat for each task. Wait for each to complete.
+
+### 6. Convert trajectories to SFT dataset
+
+```bash
+python -m prime_rl_training.collect sft \
+  --output-dir ./output \
+  --save-path ./dataset \
+  --min-reward 0.0 \
+  --include-failed \
+  --format jsonl
+```
+
+Verify the dataset:
+```bash
+wc -l dataset/train.jsonl
+head -1 dataset/train.jsonl | python -m json.tool
+```
+
+Present the trajectory count and a sample to the user.
+
+### 7. Push verifiers environment to Prime Intellect
+
+```bash
+prime env push -p ./prime-envs/simlab_tasks
+```
+
+Wait for confirmation. Note the environment ID from the output (e.g., `<username>/simlab-tasks`).
+
+### 8. Check model availability
+
+```bash
+prime rl models --plain
+```
+
+Present available models. Recommend `Qwen/Qwen3.5-9B` or another available model.
+
+### 9. Configure and launch RL training
+
+Update `configs/rl.toml` with the correct model and environment ID, then:
+
+```bash
+prime rl run configs/rl.toml
+```
+
+Note the run ID from the output.
+
+### 10. Monitor training
+
+```bash
+prime rl logs <run-id> -f
+prime rl metrics <run-id> --plain
+prime rl progress <run-id> --plain
+```
+
+Present metrics to the user.
+
+### 11. Tear down SimLab environment
+
+```bash
+simlab env down prime-rl-env
+```
+
+## Results collection
+
+After training completes:
+
+```bash
+prime rl get <run-id> --plain
+prime rl checkpoints <run-id> --plain
+```
+
+Present results:
+
+| Metric | Value |
+|--------|-------|
+| Run ID | ... |
+| Model | ... |
+| Steps completed | ... |
+| Final reward | ... |
+| Checkpoint ID | ... |
+
+## Troubleshooting
+
+- **`simlab: command not found`** — Install with `uv pip install simulationlab`
+- **`prime: command not found`** — Install with `pip install prime`
+- **No trajectories collected** — Ensure the SimLab environment is running (`simlab env up`) and API keys are valid
+- **Port conflict on env up** — Edit `docker-compose.yml` to change conflicting port mappings
+- **`prime rl models` shows "At Capacity"** — Try a different model or wait
+- **Environment push needs username** — The first push prompts for a Prime Intellect username (one-time setup)
diff --git a/cookbook/prime-rl-training/configs/rl.toml b/cookbook/prime-rl-training/configs/rl.toml
@@ -0,0 +1,39 @@
+# Prime-RL RL config for SimLab environment training
+# Usage: prime rl run configs/rl.toml
+#
+# This config runs hosted RL training on Prime Intellect's platform
+# using a SimLab verifiers environment for reward scoring.
+
+# Use the SFT-warmed model or start from a base instruct model
+model = "Qwen/Qwen3.5-4B"
+max_steps = 50
+
+# Training hyperparameters
+batch_size = 64
+rollouts_per_example = 4
+# learning_rate = 3e-6
+# lora_alpha = 16
+
+[sampling]
+max_tokens = 2048
+# temperature = 0.7
+
+# SimLab verifiers environment
+[[env]]
+id = "collinear-simlab/simlab-tasks"
+
+# Optional: W&B logging
+# [wandb]
+# project = "simlab-rl-training"
+
+# Optional: evaluation during training
+# [eval]
+# interval = 25
+# [[eval.env]]
+# id = "simlab-tasks"
+# args = { output_dir = "./output", min_reward = 0.5 }
+
+# Optional: checkpointing
+# [checkpoints]
+# interval = 50
+# keep_cloud = 3
diff --git a/cookbook/prime-rl-training/configs/sft.toml b/cookbook/prime-rl-training/configs/sft.toml
@@ -0,0 +1,35 @@
+# Prime-RL SFT config for SimLab trajectory training
+# Usage: uv run sft @ configs/sft.toml
+#
+# This config trains a small model on successful SimLab trajectories
+# to teach it the tool-use and task-completion patterns before RL.
+
+max_steps = 200
+
+[model]
+name = "Qwen/Qwen3.5-4B"
+# Uncomment for LoRA (recommended to save memory):
+# [model.lora]
+# rank = 32
+# alpha = 64
+# target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
+
+[data]
+# Local dataset path (generated by: python -m prime_rl_training.collect sft ...)
+# Or a HuggingFace dataset ID (e.g., "myorg/simlab-sft-data")
+name = "./dataset"
+seq_len = 4096
+batch_size = 16
+
+# Only train on assistant responses (mask system/user/tool messages)
+[data.loss_mask]
+system = false
+user = false
+assistant = true
+tool = false
+
+[optim]
+lr = 2e-5
+
+[ckpt]
+# Checkpoint at end of training
diff --git a/...e/3_triage_and_escalate_critical_billing_discrepancy_for_enterprise_renewal_35ba835d.json b/...e/3_triage_and_escalate_critical_billing_discrepancy_for_enterprise_renewal_35ba835d.json
@@ -0,0 +1,37 @@
+{
+  "meta": {
+    "version": "2.0",
+    "task_id": "3_triage_and_escalate_critical_billing_discrepancy_for_enterprise_renewal_35ba835d",
+    "display_name": "Triage and escalate critical billing discrepancy for enterprise renewal",
+    "category": "ticket_triage",
+    "difficulty": "hard",
+    "idempotent": false
+  },
+  "task": "You've received a billing dispute from Karen Mitchell regarding her enterprise renewal invoice. She reports a 40% increase with unexpected charges and is threatening to cancel by end of week. Contact Diana Walsh to obtain the specific billing details and charge breakdown from the invoice. Then contact Carlos Mendez to confirm what was discussed during the renewal process and any contract amendments. Once you have the facts from both, review the ticket details and determine whether this is a genuine billing error or a legitimate contract amendment issue. Provide Karen with a clear explanation of the charges and your recommended resolution path.",
+  "tool_servers": [],
+  "apps": [
+    "Helpdesk",
+    "Chat",
+    "Email"
+  ],
+  "npcs": [
+    {
+      "id": "karen_mitchell"
+    },
+    {
+      "id": "diana_walsh"
+    },
+    {
+      "id": "carlos_mendez"
+    }
+  ],
+  "seed_emails": [
+    {
+      "from_profile_id": "karen_mitchell",
+      "to_addr": "support@weaverenterprises.com",
+      "subject": "Fwd: Urgent: Billing Discrepancy on Enterprise Renewal Invoice",
+      "body_text": "---------- Forwarded message ---------\nFrom: Karen Mitchell <karen.mitchell@mitchellassociates.co>\n\nHi,\n\nI received our enterprise renewal invoice and noticed significant unexpected charges that weren't discussed during contract renewal. The invoice shows a 40% increase from our previous year, but our service scope and user count haven't changed.\n\nI need clarification on:\n1. Why the unit price increased\n2. What these additional line items represent\n3. Whether this reflects an undisclosed contract amendment\n\nI'm frustrated because we were led to believe our renewal would be flat or minimal increase. If this isn't resolved by end of this week, we will have to cancel and explore alternatives.\n\nPlease escalate this urgently.\n\nKaren Mitchell\nMitchell Associates",
+      "body_html": "<p>---------- Forwarded message ---------</p><p>From: Karen Mitchell &lt;karen.mitchell@mitchellassociates.co&gt;</p><p>Hi,</p><p>I received our enterprise renewal invoice and noticed significant unexpected charges that weren't discussed during contract renewal. The invoice shows a 40% increase from our previous year, but our service scope and user count haven't changed.</p><p>I need clarification on:</p><ul><li>Why the unit price increased</li><li>What these additional line items represent</li><li>Whether this reflects an undisclosed contract amendment</li></ul><p>I'm frustrated because we were led to believe our renewal would be flat or minimal increase. If this isn't resolved by end of this week, we will have to cancel and explore alternatives.</p><p>Please escalate this urgently.</p><p>Karen Mitchell<br>Mitchell Associates</p>"
+    }
+  ]
+}
diff --git a/...prise_client_escalation_resolve_david_parks_api_rate_limiting_issue_and_pre_de0cff0d.json b/...prise_client_escalation_resolve_david_parks_api_rate_limiting_issue_and_pre_de0cff0d.json
@@ -0,0 +1,54 @@
+{
+  "meta": {
+    "version": "2.0",
+    "task_id": "4_enterprise_client_escalation_resolve_david_parks_api_rate_limiting_issue_and_pre_de0cff0d",
+    "display_name": "Enterprise Client Escalation: Resolve David Park's API Rate Limiting Issue and Prevent Service Interruption",
+    "category": "vip_enterprise_support",
+    "difficulty": "hard",
+    "idempotent": false
+  },
+  "task": "David Park at Park Industries has escalated a critical production API rate-limiting issue causing service interruptions. Search for or create a helpdesk ticket for this issue. Before responding to David, contact Marcus Chen in the engineering-support channel to get his technical assessment. After Marcus provides his findings, reach out to Robert Hayes via direct message to confirm the remediation messaging and any service credit or compensation. Once you have both assessments, email David with the findings, remediation steps, resolution timeline, and CC Robert Hayes for executive visibility.",
+  "tool_servers": [],
+  "apps": [
+    "Helpdesk",
+    "Chat",
+    "Email"
+  ],
+  "npcs": [
+    {
+      "id": "david_park"
+    },
+    {
+      "id": "marcus_chen"
+    },
+    {
+      "id": "robert_hayes"
+    }
+  ],
+  "seed_emails": [
+    {
+      "from_profile_id": "david_park",
+      "to_addr": "support@weaverenterprises.com",
+      "subject": "URGENT: Production API Integration Rate Limiting - Service Interruptions",
+      "body_text": "Hi,\n\nWe're experiencing critical issues with our production API integration. We're hitting unexpected rate limits that are causing service interruptions for our downstream clients. This is impacting our ability to serve our end users and is a significant operational problem.\n\nWe have an SLA in place for this account and need immediate investigation and resolution.\n\nPlease advise on the root cause and what remediation steps or timeline we should expect.\n\nThanks,\nDavid Park\nPark Industries",
+      "body_html": "<p>Hi,</p><p>We're experiencing critical issues with our production API integration. We're hitting unexpected rate limits that are causing service interruptions for our downstream clients. This is impacting our ability to serve our end users and is a significant operational problem.</p><p>We have an SLA in place for this account and need immediate investigation and resolution.</p><p>Please advise on the root cause and what remediation steps or timeline we should expect.</p><p>Thanks,<br>David Park<br>Park Industries</p>"
+    }
+  ],
+  "seed_group_channels": [
+    {
+      "channel_name": "engineering-support",
+      "member_profile_ids": [
+        "marcus_chen",
+        "priya_sharma",
+        "kevin_zhang",
+        "sandra_kim"
+      ],
+      "messages": [
+        {
+          "from_profile_id": "marcus_chen",
+          "text": "Channel for escalations and coordination between support and engineering teams on critical production issues."
+        }
+      ]
+    }
+  ]
+}