From d36951b47ca8fd6b3b40116f2c77d2e9770303af Mon Sep 17 00:00:00 2001 From: Shafeeq Mohamed Date: Sun, 22 Mar 2026 14:41:37 -0400 Subject: [PATCH] Add hireEZ meeting ops RL prototype --- README.md | 173 +++++++++++++++++++++++++++ meeting_ops_env.py | 217 ++++++++++++++++++++++++++++++++++ tests/test_meeting_ops_env.py | 52 ++++++++ 3 files changed, 442 insertions(+) create mode 100644 README.md create mode 100644 meeting_ops_env.py create mode 100644 tests/test_meeting_ops_env.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..cc64b8a --- /dev/null +++ b/README.md @@ -0,0 +1,173 @@ +# Consent-First Meeting Ops Agent RL Prototype + +This repository contains a hackathon-ready prototype for a **hireEZ-aligned Customer Success Engineer solution** built around an **RL environment + agent loop** rather than a pure prompt demo. + +## Why this concept fits the role + +The target role is closer to **implementation, integration, debugging, and production-minded automation** than to speculative "emotion AI". The proposed solution is a: + +> **Consent-First Meeting Ops Agent** + +An agent that learns to turn meeting signals into approved CRM + Slack actions while optimizing for: + +- CRM hygiene +- low-risk automation +- human approval compliance +- minimal rep disruption +- observable business outcomes + +This mirrors how a Customer Success Engineer would think about customer deployments: + +- integration adapters +- validation and error handling +- auditability +- human-in-the-loop workflows +- measurable operational outcomes + +## Hackathon framing + +This project addresses the Sundai theme: + +- **RL environments + agents** +- **agentic harnesses** +- **self-improving tool use** +- **custom reward function for a realistic workflow** + +Instead of training an agent to "detect emotions," this repo defines a small environment where the agent learns the right actions across a realistic revenue-ops workflow. + +## Core idea + +A meeting arrives with structured but imperfect signals: + +- missing next step +- unclear stakeholder coverage +- weak confidence in extracted fields +- consent on/off +- risk of unnecessary automation + +The agent must choose actions such as: + +- extract structured fields +- ask for approval +- update CRM draft +- post Slack summary +- request clarification +- stay quiet in-call + +The reward function encourages behavior that is: + +- execution-oriented +- safe under uncertainty +- human-supervised when confidence is low +- useful for downstream systems + +## Environment design + +### State + +Each episode simulates one meeting/workflow state with features like: + +- `has_next_step` +- `has_stakeholder` +- `high_value_opportunity` +- `consent_granted` +- `confidence` +- `manager_requested_update` +- `rep_is_busy` +- `customer_risk_flag` + +### Actions + +The agent can choose one of these actions: + +1. `wait` +2. `extract_fields` +3. `request_clarification` +4. `ask_human_approval` +5. `write_crm` +6. `post_slack_update` +7. `show_quiet_nudge` + +### Reward intuition + +The reward function favors: + +- requesting clarification before automation when fields are incomplete +- asking for human approval before writing low-confidence updates +- writing to CRM only when consent and confidence make it appropriate +- Slack updates when they help coordination +- quiet nudges only when the rep is busy and a key item is missing + +The reward function penalizes: + +- writing to CRM without consent +- distracting the rep with unnecessary nudges +- posting weak or noisy updates +- skipping important workflow steps on high-value deals + +## Why this is stronger than an "emotion detection" pitch + +This is intentionally aligned with enterprise reality: + +- **safer governance posture** +- **clearer reward definition** +- **closer to deployment work** +- **better fit for hireEZ's semi-autonomous, human-supervised agent model** + +It also maps more directly to a CSE interview narrative: + +- API adapters +- validation logic +- human approval gates +- logs and retries +- measurable workflow gains + +## Files + +- `meeting_ops_env.py` — RL environment, rule-based training harness, and evaluation +- `tests/test_meeting_ops_env.py` — lightweight correctness tests for environment transitions and reward behavior + +## How to run + +### Run the demo policy training loop + +```bash +python3 meeting_ops_env.py +``` + +### Run tests + +```bash +python3 -m unittest discover -s tests -p 'test_*.py' +``` + +## Demo talking points for presentation + +### 1. Problem statement + +"I built an RL environment for a Meeting Ops Agent that learns when to extract, clarify, escalate, approve, and update systems of record like CRM and Slack." + +### 2. Why RL here + +"This is a sequential decision problem with competing objectives: speed, accuracy, consent, and rep experience. RL is a better fit than a single prompt because the agent must learn trade-offs across multiple steps." + +### 3. Why this fits hireEZ + +"hireEZ's product model is semi-autonomous and workflow-native. My environment trains an agent to behave that way: human-supervised, tool-using, and integration-aware." + +### 4. Why it fits a CSE role + +"I biased the design toward implementation realism: approval gates, low-confidence handling, data quality checks, and execution into operational systems." + +## Next steps if you extend this during the hackathon + +1. Add a `SalesforceAdapter` mock with idempotency keys. +2. Add a `SlackAdapter` mock with routing rules. +3. Log every transition as JSON lines for debugging. +4. Add offline replay from real or synthetic transcripts. +5. Train with Q-learning or PPO against a larger synthetic episode generator. +6. Add a front-end dashboard showing state → action → reward → human approval. + +## Suggested final demo line + +> "I did not build a surveillance demo. I built a governed execution agent that learns how to improve revenue operations safely under uncertainty." diff --git a/meeting_ops_env.py b/meeting_ops_env.py new file mode 100644 index 0000000..c8319fa --- /dev/null +++ b/meeting_ops_env.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import IntEnum +import random +from typing import Dict, List, Tuple + + +class Action(IntEnum): + WAIT = 0 + EXTRACT_FIELDS = 1 + REQUEST_CLARIFICATION = 2 + ASK_HUMAN_APPROVAL = 3 + WRITE_CRM = 4 + POST_SLACK_UPDATE = 5 + SHOW_QUIET_NUDGE = 6 + + +@dataclass(frozen=True) +class MeetingState: + has_next_step: bool + has_stakeholder: bool + high_value_opportunity: bool + consent_granted: bool + confidence_bucket: int + manager_requested_update: bool + rep_is_busy: bool + customer_risk_flag: bool + extracted_fields: bool = False + clarification_requested: bool = False + approval_requested: bool = False + crm_written: bool = False + slack_posted: bool = False + quiet_nudge_shown: bool = False + done: bool = False + + def as_key(self) -> Tuple[int, ...]: + return ( + int(self.has_next_step), + int(self.has_stakeholder), + int(self.high_value_opportunity), + int(self.consent_granted), + int(self.confidence_bucket), + int(self.manager_requested_update), + int(self.rep_is_busy), + int(self.customer_risk_flag), + int(self.extracted_fields), + int(self.clarification_requested), + int(self.approval_requested), + int(self.crm_written), + int(self.slack_posted), + int(self.quiet_nudge_shown), + int(self.done), + ) + + +class MeetingOpsEnv: + """Small RL-style environment for a consent-first meeting ops agent.""" + + def __init__(self, seed: int = 7): + self._rng = random.Random(seed) + self.state = self.reset() + + def reset(self) -> MeetingState: + self.state = MeetingState( + has_next_step=self._rng.choice([True, False]), + has_stakeholder=self._rng.choice([True, False]), + high_value_opportunity=self._rng.choice([True, False]), + consent_granted=self._rng.choice([True, False]), + confidence_bucket=self._rng.randint(0, 2), + manager_requested_update=self._rng.choice([True, False]), + rep_is_busy=self._rng.choice([True, False]), + customer_risk_flag=self._rng.choice([True, False]), + ) + return self.state + + def step(self, action: Action) -> Tuple[MeetingState, float, bool, Dict[str, str]]: + state = self.state + if state.done: + return state, 0.0, True, {"message": "Episode already completed."} + + reward = -0.1 + message = "No-op" + + if action == Action.WAIT: + reward += self._reward_wait(state) + next_state = MeetingState(**{**state.__dict__, "done": True}) + message = "Agent waited." + + elif action == Action.EXTRACT_FIELDS: + reward += 1.0 if not state.extracted_fields else -0.5 + next_state = MeetingState(**{**state.__dict__, "extracted_fields": True}) + message = "Structured extraction completed." + + elif action == Action.REQUEST_CLARIFICATION: + base = 1.5 if (not state.has_next_step or not state.has_stakeholder) else -0.4 + next_state = MeetingState( + **{ + **state.__dict__, + "clarification_requested": True, + "has_next_step": True, + "has_stakeholder": True, + } + ) + reward += base + message = "Clarification requested to complete missing fields." + + elif action == Action.ASK_HUMAN_APPROVAL: + base = 1.2 if state.confidence_bucket < 2 or state.customer_risk_flag else 0.4 + next_state = MeetingState(**{**state.__dict__, "approval_requested": True}) + reward += base + message = "Human approval requested." + + elif action == Action.WRITE_CRM: + reward += self._reward_write_crm(state) + next_state = MeetingState(**{**state.__dict__, "crm_written": True, "done": True}) + message = "CRM write attempted." + + elif action == Action.POST_SLACK_UPDATE: + reward += self._reward_post_slack(state) + next_state = MeetingState(**{**state.__dict__, "slack_posted": True}) + message = "Slack update posted." + + elif action == Action.SHOW_QUIET_NUDGE: + reward += self._reward_quiet_nudge(state) + next_state = MeetingState(**{**state.__dict__, "quiet_nudge_shown": True}) + message = "Quiet in-call nudge shown." + + else: + raise ValueError(f"Unknown action: {action}") + + self.state = next_state + return next_state, reward, next_state.done, {"message": message} + + def _reward_wait(self, state: MeetingState) -> float: + if state.high_value_opportunity and not state.crm_written: + return -1.0 + if state.rep_is_busy and state.has_next_step and state.has_stakeholder: + return 0.3 + return -0.2 + + def _reward_write_crm(self, state: MeetingState) -> float: + if not state.consent_granted: + return -4.0 + if not state.has_next_step or not state.has_stakeholder: + return -2.5 + if state.confidence_bucket == 0 and not state.approval_requested: + return -2.0 + score = 3.0 + if state.approval_requested: + score += 1.0 + if state.manager_requested_update: + score += 0.5 + return score + + def _reward_post_slack(self, state: MeetingState) -> float: + if state.manager_requested_update or state.customer_risk_flag: + return 1.5 + if state.high_value_opportunity: + return 0.8 + return -0.3 + + def _reward_quiet_nudge(self, state: MeetingState) -> float: + if state.rep_is_busy and (not state.has_next_step or not state.has_stakeholder): + return 1.3 + if state.rep_is_busy: + return 0.3 + return -0.8 + + +def heuristic_policy(state: MeetingState) -> Action: + if (not state.has_next_step or not state.has_stakeholder) and not state.clarification_requested: + if state.rep_is_busy: + return Action.SHOW_QUIET_NUDGE + return Action.REQUEST_CLARIFICATION + if not state.extracted_fields: + return Action.EXTRACT_FIELDS + if (state.confidence_bucket < 2 or state.customer_risk_flag) and not state.approval_requested: + return Action.ASK_HUMAN_APPROVAL + if state.manager_requested_update and not state.slack_posted: + return Action.POST_SLACK_UPDATE + if state.consent_granted and not state.crm_written: + return Action.WRITE_CRM + return Action.WAIT + + +def evaluate_policy(episodes: int = 100, seed: int = 7) -> Dict[str, float]: + env = MeetingOpsEnv(seed=seed) + total_reward = 0.0 + crm_writes = 0 + approval_requests = 0 + + for _ in range(episodes): + state = env.reset() + done = False + steps = 0 + while not done and steps < 8: + action = heuristic_policy(state) + state, reward, done, _ = env.step(action) + total_reward += reward + steps += 1 + crm_writes += int(state.crm_written) + approval_requests += int(state.approval_requested) + + return { + "episodes": float(episodes), + "avg_reward": total_reward / episodes, + "crm_write_rate": crm_writes / episodes, + "approval_request_rate": approval_requests / episodes, + } + + +if __name__ == "__main__": + metrics = evaluate_policy(episodes=200) + print("Consent-First Meeting Ops Agent evaluation") + for key, value in metrics.items(): + print(f"- {key}: {value:.3f}") diff --git a/tests/test_meeting_ops_env.py b/tests/test_meeting_ops_env.py new file mode 100644 index 0000000..41956bc --- /dev/null +++ b/tests/test_meeting_ops_env.py @@ -0,0 +1,52 @@ +import unittest + +from meeting_ops_env import Action, MeetingOpsEnv, MeetingState, heuristic_policy + + +class MeetingOpsEnvTest(unittest.TestCase): + def test_write_crm_without_consent_is_penalized(self): + env = MeetingOpsEnv(seed=1) + env.state = MeetingState( + has_next_step=True, + has_stakeholder=True, + high_value_opportunity=True, + consent_granted=False, + confidence_bucket=2, + manager_requested_update=False, + rep_is_busy=False, + customer_risk_flag=False, + ) + _, reward, done, _ = env.step(Action.WRITE_CRM) + self.assertTrue(done) + self.assertLess(reward, -3.0) + + def test_heuristic_requests_clarification_for_missing_fields(self): + state = MeetingState( + has_next_step=False, + has_stakeholder=True, + high_value_opportunity=False, + consent_granted=True, + confidence_bucket=2, + manager_requested_update=False, + rep_is_busy=False, + customer_risk_flag=False, + ) + self.assertEqual(heuristic_policy(state), Action.REQUEST_CLARIFICATION) + + def test_heuristic_prefers_approval_under_low_confidence(self): + state = MeetingState( + has_next_step=True, + has_stakeholder=True, + high_value_opportunity=False, + consent_granted=True, + confidence_bucket=0, + manager_requested_update=False, + rep_is_busy=False, + customer_risk_flag=False, + extracted_fields=True, + ) + self.assertEqual(heuristic_policy(state), Action.ASK_HUMAN_APPROVAL) + + +if __name__ == "__main__": + unittest.main()