Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
256 changes: 148 additions & 108 deletions dist/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6581,13 +6581,65 @@ def __init__(self, *, agent: str, field: str, message: str | None = None):


_HEADER_SPLIT = re.compile(r"^#{2,}\s+(\w+)\s*$", re.MULTILINE)
_CONF_LINE = re.compile(
# Leftmost float (allows int form), optional rationale after em-dash /
# ASCII dash / hyphen separator. ``re.DOTALL`` so a multi-line rationale
# is captured wholesale.
r"^\s*(-?[0-9]*\.?[0-9]+)\s*(?:[\-\u2010\u2011\u2012\u2013\u2014\u2015]+\s*(.*))?$",
re.DOTALL,
)

# Dash separators between the confidence number and its rationale \u2014
# accept the full Pd block so gpt-oss's preferred EN DASH (\u2013) and
# the spec's EM DASH (\u2014) both parse, plus the ASCII hyphen.
_DASH_CHARS = frozenset("\u2010\u2011\u2012\u2013\u2014\u2015-")


def _parse_confidence_line(raw: str) -> tuple[float, str] | None:
"""Procedural confidence-line parser. Returns ``(value, rationale)`` on
success, ``None`` on shape mismatch.

Replaces an earlier regex implementation that Sonar's S5852 flagged
as polynomial-time backtracking-vulnerable. A linear scan over the
leading whitespace + number + optional dash-prefixed rationale has
no backtracking surface to attack.

Accepted shapes (on the first non-empty line of the section body):
* ``"0.85"``
* ``"0.85 \u2014 rationale"`` (or ASCII ``--`` / ``-`` / any Pd dash)
* ``"-0.5 - rationale"``
* ``"1"`` / ``"1."`` / ``".5"``
"""
body = raw.lstrip()
if not body:
return None

# Pull the leading number token: optional minus, then any combination
# of digits and at most one dot. Stop at the first character that
# cannot be part of a number.
pos = 0
if body[pos] == "-":
pos += 1
num_start = pos
saw_dot = False
saw_digit = False
while pos < len(body):
ch = body[pos]
if ch.isdigit():
saw_digit = True
pos += 1
continue
if ch == "." and not saw_dot:
saw_dot = True
pos += 1
continue
break
if not saw_digit:
return None
try:
value = float(body[: pos] if pos > num_start else "0")
except ValueError:
return None

# Skip whitespace + dash-cluster + whitespace before the rationale.
rest = body[pos:].lstrip()
while rest and rest[0] in _DASH_CHARS:
rest = rest[1:]
rationale = rest.lstrip()
return value, rationale


def _clamp_unit(x: float) -> float:
Expand Down Expand Up @@ -6651,26 +6703,17 @@ def parse_markdown_envelope(
),
)

m = _CONF_LINE.match(raw_conf)
if not m:
parsed = _parse_confidence_line(raw_conf)
if parsed is None:
raise EnvelopeMissingError(
agent=agent, field="confidence",
message=(
f"envelope_missing: confidence section did not parse "
f"(agent={agent!r}, raw={raw_conf!r})"
),
)
try:
conf_value = _clamp_unit(float(m.group(1)))
except (TypeError, ValueError) as exc:
raise EnvelopeMissingError(
agent=agent, field="confidence",
message=(
f"envelope_missing: confidence value not a float "
f"(agent={agent!r}, raw={raw_conf!r})"
),
) from exc
rationale = (m.group(2) or "").strip() or "(no rationale provided)"
conf_value = _clamp_unit(parsed[0])
rationale = parsed[1].strip() or "(no rationale provided)"

signal_raw = sections.get("signal", "").strip().lower() or None
if signal_raw in {"none", "null", "", "n/a"}:
Expand Down Expand Up @@ -7135,6 +7178,51 @@ def _evaluate_gate(
return decision


def _record_pending_resolution(
*,
session: Session,
pending_idx: int,
agent_name: str,
tool_name: str,
pending_args: dict,
pending_ts: str,
status: ToolStatus,
result: Any,
approver: str | None,
rationale: str | None,
store: "SessionStore | None",
) -> None:
"""Replace the ``pending_approval`` row at ``pending_idx`` with the
resolved row (``approved``/``rejected``/``timeout``) and persist
the change so the DB reflects the actual outcome.

Without the persist step, the DB row stays at ``pending_approval``
forever — the in-memory mutation is invisible to the UI's
``_render_pending_approvals_block`` predicate (which polls from
DB), so the operator keeps seeing Approve / Reject buttons after
they've already approved or rejected.

Centralised here so the three transition branches in both ``_run``
and ``_arun`` share one implementation rather than carrying six
near-identical 12-line blocks. The dedup also keeps Sonar's
"new duplicated lines" metric below the project's quality gate.
"""
session.tool_calls[pending_idx] = ToolCall(
agent=agent_name,
tool=tool_name,
args=pending_args,
result=result,
ts=pending_ts,
risk="high",
status=status,
approver=approver,
approved_at=_now_iso(),
approval_rationale=rationale,
)
if store is not None:
store.save(session)


class _GatedToolMarker(BaseTool):
"""Marker base class so ``isinstance(t, _GatedToolMarker)`` identifies
a tool that has already been wrapped by :func:`wrap_tool`. Used to
Expand Down Expand Up @@ -7414,25 +7502,15 @@ def _run(self, *args: Any, **kwargs: Any) -> Any: # noqa: D401
)
verdict_str = str(verdict).lower()
if verdict_str == "reject":
rejected_result = {"rejected": True, "rationale": rationale}
if pending_idx is not None:
session.tool_calls[pending_idx] = ToolCall(
agent=agent_name,
tool=inner.name,
args=pending_args,
result={"rejected": True, "rationale": rationale},
ts=pending_ts,
risk="high",
status="rejected",
approver=approver,
approved_at=_now_iso(),
approval_rationale=rationale,
_record_pending_resolution(
session=session, pending_idx=pending_idx,
agent_name=agent_name, tool_name=inner.name,
pending_args=pending_args, pending_ts=pending_ts,
status="rejected", result=rejected_result,
approver=approver, rationale=rationale, store=store,
)
# Persist the status transition. Without this,
# the DB row stays at ``pending_approval`` and
# the UI keeps offering the buttons forever.
if store is not None:
store.save(session)
rejected_result = {"rejected": True, "rationale": rationale}
_emit_invoked(
status="rejected", risk="high",
args_dict=pending_args, result=rejected_result,
Expand All @@ -7445,22 +7523,15 @@ def _run(self, *args: Any, **kwargs: Any) -> Any: # noqa: D401
# downstream consumers (UI, retraining) can
# distinguish operator-initiated rejections from
# automatic timeouts.
timeout_result = {"timeout": True, "rationale": rationale}
if pending_idx is not None:
session.tool_calls[pending_idx] = ToolCall(
agent=agent_name,
tool=inner.name,
args=pending_args,
result={"timeout": True, "rationale": rationale},
ts=pending_ts,
risk="high",
status="timeout",
approver=approver,
approved_at=_now_iso(),
approval_rationale=rationale,
_record_pending_resolution(
session=session, pending_idx=pending_idx,
agent_name=agent_name, tool_name=inner.name,
pending_args=pending_args, pending_ts=pending_ts,
status="timeout", result=timeout_result,
approver=approver, rationale=rationale, store=store,
)
if store is not None:
store.save(session)
timeout_result = {"timeout": True, "rationale": rationale}
_emit_invoked(
status="timeout", risk="high",
args_dict=pending_args, result=timeout_result,
Expand All @@ -7470,20 +7541,13 @@ def _run(self, *args: Any, **kwargs: Any) -> Any: # noqa: D401
# Approved -> run the tool, then update the audit row.
result = _sync_invoke_inner(kwargs if kwargs else args[0] if args else {})
if pending_idx is not None:
session.tool_calls[pending_idx] = ToolCall(
agent=agent_name,
tool=inner.name,
args=pending_args,
result=result,
ts=pending_ts,
risk="high",
status="approved",
approver=approver,
approved_at=_now_iso(),
approval_rationale=rationale,
_record_pending_resolution(
session=session, pending_idx=pending_idx,
agent_name=agent_name, tool_name=inner.name,
pending_args=pending_args, pending_ts=pending_ts,
status="approved", result=result,
approver=approver, rationale=rationale, store=store,
)
if store is not None:
store.save(session)
_emit_invoked(
status="approved", risk="high",
args_dict=pending_args, result=result,
Expand Down Expand Up @@ -7601,48 +7665,31 @@ async def _arun(self, *args: Any, **kwargs: Any) -> Any: # noqa: D401
)
verdict_str = str(verdict).lower()
if verdict_str == "reject":
rejected_result = {"rejected": True, "rationale": rationale}
if pending_idx is not None:
session.tool_calls[pending_idx] = ToolCall(
agent=agent_name,
tool=inner.name,
args=pending_args,
result={"rejected": True, "rationale": rationale},
ts=pending_ts,
risk="high",
status="rejected",
approver=approver,
approved_at=_now_iso(),
approval_rationale=rationale,
_record_pending_resolution(
session=session, pending_idx=pending_idx,
agent_name=agent_name, tool_name=inner.name,
pending_args=pending_args, pending_ts=pending_ts,
status="rejected", result=rejected_result,
approver=approver, rationale=rationale, store=store,
)
# Persist the status transition (mirror of the
# sync path) so the DB row reflects the actual
# outcome instead of staying at pending_approval.
if store is not None:
store.save(session)
rejected_result = {"rejected": True, "rationale": rationale}
_emit_invoked(
status="rejected", risk="high",
args_dict=pending_args, result=rejected_result,
latency_ms=(time.monotonic() - t0) * 1000,
)
return rejected_result
if verdict_str == "timeout":
timeout_result = {"timeout": True, "rationale": rationale}
if pending_idx is not None:
session.tool_calls[pending_idx] = ToolCall(
agent=agent_name,
tool=inner.name,
args=pending_args,
result={"timeout": True, "rationale": rationale},
ts=pending_ts,
risk="high",
status="timeout",
approver=approver,
approved_at=_now_iso(),
approval_rationale=rationale,
_record_pending_resolution(
session=session, pending_idx=pending_idx,
agent_name=agent_name, tool_name=inner.name,
pending_args=pending_args, pending_ts=pending_ts,
status="timeout", result=timeout_result,
approver=approver, rationale=rationale, store=store,
)
if store is not None:
store.save(session)
timeout_result = {"timeout": True, "rationale": rationale}
_emit_invoked(
status="timeout", risk="high",
args_dict=pending_args, result=timeout_result,
Expand All @@ -7651,20 +7698,13 @@ async def _arun(self, *args: Any, **kwargs: Any) -> Any: # noqa: D401
return timeout_result
result = await inner.ainvoke(kwargs if kwargs else args[0] if args else {})
if pending_idx is not None:
session.tool_calls[pending_idx] = ToolCall(
agent=agent_name,
tool=inner.name,
args=pending_args,
result=result,
ts=pending_ts,
risk="high",
status="approved",
approver=approver,
approved_at=_now_iso(),
approval_rationale=rationale,
_record_pending_resolution(
session=session, pending_idx=pending_idx,
agent_name=agent_name, tool_name=inner.name,
pending_args=pending_args, pending_ts=pending_ts,
status="approved", result=result,
approver=approver, rationale=rationale, store=store,
)
if store is not None:
store.save(session)
_emit_invoked(
status="approved", risk="high",
args_dict=pending_args, result=result,
Expand Down
Loading
Loading