diff --git a/.github/workflows/agentry-issue-triage.yml b/.github/workflows/agentry-bug-fix.yml similarity index 61% rename from .github/workflows/agentry-issue-triage.yml rename to .github/workflows/agentry-bug-fix.yml index f3c66d2..4e60b92 100644 --- a/.github/workflows/agentry-issue-triage.yml +++ b/.github/workflows/agentry-bug-fix.yml @@ -1,26 +1,29 @@ # This workflow is a manually refined version of the output generated by: -# agentry ci generate --target github --dry-run workflows/triage.yaml +# agentry ci generate --target github --dry-run workflows/bug-fix.yaml # # Manually refined to include: -# - Proper workflow name capitalization ("Issue Triage" instead of "triage") -# - Permissions for issues write access (issue:comment and issue:label tools) +# - Proper workflow name capitalization ("Bug Fix" instead of "bug-fix") +# - Permissions for issue write and pull-request write access (pr:create and issue:comment tools) # - Workflow-level env section with CLAUDE_CODE_OAUTH_TOKEN # - Local installation via "pip install ." (using workspace as source) -# - Specific agentry run parameters: --input issue-description, --input repository-ref, --binder, --output-format +# - Specific agentry run parameters: --input repository-ref, --binder, --output-format # - Improved formatting and readability with proper spacing +# - Conditional trigger: only runs when the applied label is "category:bug" -name: "Agentry: Issue Triage" +name: "Agentry: Bug Fix" 'on': issues: - types: [opened] + types: [labeled] permissions: - contents: read + contents: write issues: write + pull-requests: write jobs: agentry: + if: github.event.label.name == 'category:bug' runs-on: ubuntu-latest steps: - name: Checkout repository @@ -39,7 +42,7 @@ jobs: - name: Run agentry run: > - agentry --output-format json run workflows/triage.yaml + agentry --output-format json run workflows/bug-fix.yaml --input repository-ref=. --binder github-actions env: diff --git a/.github/workflows/agentry-feature-implement.yml b/.github/workflows/agentry-feature-implement.yml new file mode 100644 index 0000000..d3fa12d --- /dev/null +++ b/.github/workflows/agentry-feature-implement.yml @@ -0,0 +1,50 @@ +# This workflow is a manually refined version of the output generated by: +# agentry ci generate --target github --dry-run workflows/feature-implement.yaml +# +# Manually refined to include: +# - Proper workflow name capitalization ("Feature Implement" instead of "feature-implement") +# - Permissions for issue write and pull-request write access (pr:create and issue:comment tools) +# - Workflow-level env section with CLAUDE_CODE_OAUTH_TOKEN +# - Local installation via "pip install ." (using workspace as source) +# - Specific agentry run parameters: --input repository-ref, --binder, --output-format +# - Improved formatting and readability with proper spacing +# - Conditional trigger: only runs when the applied label is "category:feature" + +name: "Agentry: Feature Implement" + +'on': + issues: + types: [labeled] + +permissions: + contents: write + issues: write + pull-requests: write + +jobs: + agentry: + if: github.event.label.name == 'category:feature' + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Claude Code + run: npm install -g @anthropic-ai/claude-code + + - name: Install agentry + run: pip install . + + - name: Run agentry + run: > + agentry --output-format json run workflows/feature-implement.yaml + --input repository-ref=. + --binder github-actions + env: + CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/agentry-planning-pipeline.yml b/.github/workflows/agentry-planning-pipeline.yml new file mode 100644 index 0000000..7bcc1c6 --- /dev/null +++ b/.github/workflows/agentry-planning-pipeline.yml @@ -0,0 +1,40 @@ +# This workflow replaces the superseded agentry-issue-triage.yml workflow. +# It triggers on issue creation and runs the planning pipeline which handles issue triage +# and task decomposition more comprehensively than the triage-only workflow. + +name: "Agentry: Planning Pipeline" + +'on': + issues: + types: [opened] + +permissions: + contents: read + issues: write + +jobs: + agentry: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Claude Code + run: npm install -g @anthropic-ai/claude-code + + - name: Install agentry + run: pip install . + + - name: Run agentry + run: > + agentry --output-format json run workflows/planning-pipeline.yaml + --input repository-ref=. + --binder github-actions + env: + CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/docs/specs/04-spec-agentry-ci/02-proofs/T02-01-test.txt b/docs/specs/04-spec-agentry-ci/02-proofs/T02-01-test.txt deleted file mode 100644 index b7d42dc..0000000 --- a/docs/specs/04-spec-agentry-ci/02-proofs/T02-01-test.txt +++ /dev/null @@ -1,22 +0,0 @@ -Type: test -Command: uv run pytest tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource -v -Expected: All 9 new T02 tests pass -Timestamp: 2026-03-27T00:00:00Z - -============================= test session starts ============================== -platform linux -- Python 3.13.7, pytest-9.0.2, pluggy-1.6.0 -configfile: pyproject.toml - -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_resolves_issue_body_from_issues_event PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_falls_back_to_issue_title_when_body_is_null PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_falls_back_to_issue_title_when_body_is_absent PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_falls_back_to_issue_title_when_body_is_empty_string PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_fallback_emits_warning_log PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_cli_provided_value_overrides_source_and_fallback PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_cli_override_takes_precedence_even_when_body_null PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_no_fallback_returns_none_when_source_missing PASSED -tests/unit/test_github_binder_inputs.py::TestResolveInputsIssueBodySource::test_fallback_not_triggered_when_source_resolves PASSED - -============================== 9 passed in 0.03s =============================== - -Status: PASS diff --git a/docs/specs/04-spec-agentry-ci/02-proofs/T02-02-test.txt b/docs/specs/04-spec-agentry-ci/02-proofs/T02-02-test.txt deleted file mode 100644 index 83f71dd..0000000 --- a/docs/specs/04-spec-agentry-ci/02-proofs/T02-02-test.txt +++ /dev/null @@ -1,10 +0,0 @@ -Type: test -Command: uv run pytest tests/unit/ -q -Expected: Full unit test suite passes with no regressions -Timestamp: 2026-03-27T00:00:00Z - -1572 passed, 1 skipped in 8.20s - -(T02 adds 9 new tests to test_github_binder_inputs.py; total includes tests from concurrent T01 worker) - -Status: PASS diff --git a/docs/specs/04-spec-agentry-ci/02-proofs/T02-proofs.md b/docs/specs/04-spec-agentry-ci/02-proofs/T02-proofs.md deleted file mode 100644 index 2f84dd7..0000000 --- a/docs/specs/04-spec-agentry-ci/02-proofs/T02-proofs.md +++ /dev/null @@ -1,45 +0,0 @@ -# T02 Proof Summary: Issue Input Resolution via Source Mapping - -## Task -T02 — Enable the triage workflow's `issue-description` string input to resolve from -the GitHub issue event payload using existing source mapping. - -## Changes Implemented - -### 1. `workflows/triage.yaml` -- Added `source: issue.body` field to the `issue-description` input declaration -- Added `fallback: issue.title` field so that when `issue.body` is null/empty, the - issue title is used as a fallback with a warning log - -### 2. `src/agentry/binders/github_actions.py` -- Added `import logging` and `logger = logging.getLogger(__name__)` -- Updated `_resolve_string()` to support the `fallback` key in input specs: - - When `source` resolves to null or empty string, the `fallback` dotpath is tried - - A `WARNING` level log is emitted when falling back, including the input name, - source path, fallback path, and resolved fallback value - - CLI `--input` overrides (via `provided_values`) still take strict priority - over both `source` and `fallback` - -### 3. `tests/unit/test_github_binder_inputs.py` -- Added `TestResolveInputsIssueBodySource` class with 9 new tests covering: - - `issue.body` resolution from `issues` event payload - - Fallback to `issue.title` when body is null - - Fallback to `issue.title` when body key is absent - - Fallback to `issue.title` when body is an empty string - - Warning log emission on fallback - - CLI `--input` override taking precedence over source and fallback - - CLI override wins even when body is null - - No fallback key: returns None for optional missing source - - Fallback not triggered when source resolves to a non-empty value - -## Proof Artifacts - -| File | Type | Status | -|------|------|--------| -| T02-01-test.txt | test (new tests only) | PASS | -| T02-02-test.txt | test (full unit suite) | PASS | - -## Test Counts -- Baseline: 1534 passed, 1 skipped -- After T02: 1572 passed, 1 skipped (includes T01's tests too) -- T02 adds 9 new tests in `TestResolveInputsIssueBodySource` diff --git a/docs/specs/07-spec-issue-triage/01-proofs/T01-01-test.txt b/docs/specs/07-spec-issue-triage/01-proofs/T01-01-test.txt deleted file mode 100644 index 1b5a611..0000000 --- a/docs/specs/07-spec-issue-triage/01-proofs/T01-01-test.txt +++ /dev/null @@ -1,113 +0,0 @@ -Type: test -Command: uv run pytest tests/unit/test_github_binder_tools.py tests/integration/test_issue_tools.py -v -Expected: All 97 tests pass -Timestamp: 2026-03-27T00:00:00Z - -============================= test session starts ============================== -platform linux -- Python 3.13.7, pytest-9.0.2, pluggy-1.6.0 -- /.venv/bin/python -cachedir: .pytest_cache -rootdir: -configfile: pyproject.toml -plugins: asyncio-1.3.0, cov-7.0.0, anyio-4.12.1 -asyncio: mode=Mode.STRICT, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function -collecting ... collected 97 items - -tests/unit/test_github_binder_tools.py::TestBindToolsSupported::test_all_supported_tools_can_be_bound PASSED [ 1%] -tests/unit/test_github_binder_tools.py::TestBindToolsSupported::test_bound_tools_are_callable PASSED [ 2%] -tests/unit/test_github_binder_tools.py::TestBindToolsSupported::test_empty_tool_list_returns_empty_dict PASSED [ 3%] -tests/unit/test_github_binder_tools.py::TestBindToolsSupported::test_bind_single_tool_repository_read PASSED [ 4%] -tests/unit/test_github_binder_tools.py::TestBindToolsSupported::test_bind_single_tool_shell_execute PASSED [ 5%] -tests/unit/test_github_binder_tools.py::TestBindToolsRepositoryRead::test_reads_file_from_workspace PASSED [ 6%] -tests/unit/test_github_binder_tools.py::TestBindToolsRepositoryRead::test_reads_nested_file_from_workspace PASSED [ 7%] -tests/unit/test_github_binder_tools.py::TestBindToolsRepositoryRead::test_path_traversal_raises_error PASSED [ 8%] -tests/unit/test_github_binder_tools.py::TestBindToolsRepositoryRead::test_path_traversal_error_message_contains_path PASSED [ 9%] -tests/unit/test_github_binder_tools.py::TestBindToolsRepositoryRead::test_workspace_root_used_not_cwd PASSED [ 10%] -tests/unit/test_github_binder_tools.py::TestBindToolsRepositoryRead::test_missing_file_raises_file_not_found PASSED [ 11%] -tests/unit/test_github_binder_tools.py::TestBindToolsRepositoryRead::test_requires_path_kwarg PASSED [ 12%] -tests/unit/test_github_binder_tools.py::TestBindToolsShellExecute::test_allowed_command_git_log_executes PASSED [ 13%] -tests/unit/test_github_binder_tools.py::TestBindToolsShellExecute::test_disallowed_command_rm_raises_error PASSED [ 14%] -tests/unit/test_github_binder_tools.py::TestBindToolsShellExecute::test_disallowed_command_curl_raises_error PASSED [ 15%] -tests/unit/test_github_binder_tools.py::TestBindToolsShellExecute::test_disallowed_command_error_message_contains_command PASSED [ 16%] -tests/unit/test_github_binder_tools.py::TestBindToolsShellExecute::test_disallowed_git_subcommand_raises_error PASSED [ 17%] -tests/unit/test_github_binder_tools.py::TestBindToolsShellExecute::test_allowed_ls_command PASSED [ 18%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRComment::test_pr_comment_posts_to_correct_url PASSED [ 19%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRComment::test_pr_comment_uses_github_api_base_url PASSED [ 20%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRComment::test_pr_comment_posts_body_in_payload PASSED [ 21%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRComment::test_pr_comment_uses_auth_token PASSED [ 22%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRComment::test_pr_comment_returns_api_response PASSED [ 23%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRComment::test_pr_comment_on_non_pr_event_raises_value_error PASSED [ 24%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRComment::test_pr_comment_requires_body_kwarg PASSED [ 25%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_posts_to_correct_url PASSED [ 26%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_uses_github_api_base_url PASSED [ 27%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_posts_body_in_payload PASSED [ 28%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_includes_event_in_payload PASSED [ 29%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_default_event_is_comment PASSED [ 30%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_uses_auth_token PASSED [ 31%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_on_non_pr_event_raises_value_error PASSED [ 32%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_returns_api_response PASSED [ 34%] -tests/unit/test_github_binder_tools.py::TestBindToolsPRReview::test_pr_review_accepts_inline_comments PASSED [ 35%] -tests/unit/test_github_binder_tools.py::TestBindToolsUnsupportedTools::test_unknown_tool_raises_unsupported_tool_error PASSED [ 36%] -tests/unit/test_github_binder_tools.py::TestBindToolsUnsupportedTools::test_error_message_contains_tool_name PASSED [ 37%] -tests/unit/test_github_binder_tools.py::TestBindToolsUnsupportedTools::test_unknown_tool_in_mixed_list_raises PASSED [ 38%] -tests/unit/test_github_binder_tools.py::TestBindToolsUnsupportedTools::test_error_mentions_unsupported_tool_name PASSED [ 39%] -tests/unit/test_github_binder_tools.py::TestBindToolsUnsupportedTools::test_arbitrary_unsupported_tool_name PASSED [ 40%] -tests/unit/test_github_binder_tools.py::TestPRCommentAPIErrors::test_403_error_mentions_status_code PASSED [ 41%] -tests/unit/test_github_binder_tools.py::TestPRCommentAPIErrors::test_403_error_includes_scope_remediation PASSED [ 42%] -tests/unit/test_github_binder_tools.py::TestPRCommentAPIErrors::test_404_error_mentions_status_code PASSED [ 43%] -tests/unit/test_github_binder_tools.py::TestPRCommentAPIErrors::test_404_error_includes_pr_number PASSED [ 44%] -tests/unit/test_github_binder_tools.py::TestPRCommentAPIErrors::test_network_timeout_raises_runtime_error PASSED [ 45%] -tests/unit/test_github_binder_tools.py::TestPRCommentAPIErrors::test_500_error_raises_runtime_error_with_status PASSED [ 46%] -tests/unit/test_github_binder_tools.py::TestPRReviewAPIErrors::test_403_error_includes_scope_remediation PASSED [ 47%] -tests/unit/test_github_binder_tools.py::TestPRReviewAPIErrors::test_404_error_includes_pr_not_found_info PASSED [ 48%] -tests/unit/test_github_binder_tools.py::TestPRReviewAPIErrors::test_network_timeout_raises_runtime_error PASSED [ 49%] -tests/unit/test_github_binder_tools.py::TestExtractIssueNumber::test_returns_none_for_non_issues_event PASSED [ 50%] -tests/unit/test_github_binder_tools.py::TestExtractIssueNumber::test_returns_none_for_pull_request_event PASSED [ 51%] -tests/unit/test_github_binder_tools.py::TestExtractIssueNumber::test_returns_issue_number_for_issues_event PASSED [ 52%] -tests/unit/test_github_binder_tools.py::TestExtractIssueNumber::test_returns_none_when_issue_key_absent PASSED [ 53%] -tests/unit/test_github_binder_tools.py::TestExtractIssueNumber::test_returns_none_when_number_key_absent PASSED [ 54%] -tests/unit/test_github_binder_tools.py::TestExtractIssueNumber::test_converts_number_to_int PASSED [ 55%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_posts_to_correct_url PASSED [ 56%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_uses_github_api_base_url PASSED [ 57%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_posts_body_in_payload PASSED [ 58%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_uses_auth_token PASSED [ 59%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_returns_api_response PASSED [ 60%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_on_non_issues_event_raises_value_error PASSED [ 61%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_on_pr_event_raises_value_error PASSED [ 62%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_requires_body_kwarg PASSED [ 63%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_403_error_mentions_issues_scope PASSED [ 64%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_404_error_mentions_issue_number PASSED [ 65%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueComment::test_issue_comment_timeout_raises_runtime_error PASSED [ 67%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_posts_to_correct_url PASSED [ 68%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_uses_github_api_base_url PASSED [ 69%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_posts_labels_in_payload PASSED [ 70%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_uses_auth_token PASSED [ 71%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_returns_api_response PASSED [ 72%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_on_non_issues_event_raises_value_error PASSED [ 73%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_on_pr_event_raises_value_error PASSED [ 74%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_requires_labels_kwarg PASSED [ 75%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_403_error_mentions_issues_scope PASSED [ 76%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_404_error_mentions_issue_number PASSED [ 77%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_422_error_mentions_validation PASSED [ 78%] -tests/unit/test_github_binder_tools.py::TestBindToolsIssueLabel::test_issue_label_timeout_raises_runtime_error PASSED [ 79%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_sends_correct_api_request PASSED [ 80%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_sends_correct_body PASSED [ 81%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_includes_authorization_header PASSED [ 82%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_returns_response_json PASSED [ 83%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_403_raises_runtime_with_remediation PASSED [ 84%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_404_raises_runtime_with_issue_number PASSED [ 85%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_timeout_raises_runtime_error PASSED [ 86%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_raises_on_non_issues_event PASSED [ 87%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_sends_correct_api_request PASSED [ 88%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_sends_labels_in_payload PASSED [ 89%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_includes_authorization_header PASSED [ 90%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_returns_response_json PASSED [ 91%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_403_raises_runtime_with_remediation PASSED [ 92%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_404_raises_runtime_with_issue_number PASSED [ 93%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_422_raises_runtime_with_validation_hint PASSED [ 94%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_timeout_raises_runtime_error PASSED [ 95%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_raises_on_non_issues_event PASSED [ 96%] -tests/integration/test_issue_tools.py::TestSupportedToolsContainsIssueTools::test_issue_comment_in_supported_tools PASSED [ 97%] -tests/integration/test_issue_tools.py::TestSupportedToolsContainsIssueTools::test_issue_label_in_supported_tools PASSED [ 98%] -tests/integration/test_issue_tools.py::TestSupportedToolsContainsIssueTools::test_both_issue_tools_can_be_bound_together PASSED [100%] - -============================== 97 passed in 0.09s ============================== diff --git a/docs/specs/07-spec-issue-triage/01-proofs/T01-02-cli.txt b/docs/specs/07-spec-issue-triage/01-proofs/T01-02-cli.txt deleted file mode 100644 index 526cd37..0000000 --- a/docs/specs/07-spec-issue-triage/01-proofs/T01-02-cli.txt +++ /dev/null @@ -1,7 +0,0 @@ -Type: cli -Command: python -c "from agentry.binders.github_actions import SUPPORTED_TOOLS; print(sorted(SUPPORTED_TOOLS))" -Expected: 'issue:comment' and 'issue:label' present in SUPPORTED_TOOLS -Timestamp: 2026-03-27T00:00:00Z -Status: PASS - -['issue:comment', 'issue:label', 'pr:comment', 'pr:create', 'pr:review', 'repository:read', 'shell:execute'] diff --git a/docs/specs/07-spec-issue-triage/01-proofs/T01-proofs.md b/docs/specs/07-spec-issue-triage/01-proofs/T01-proofs.md deleted file mode 100644 index ac5feed..0000000 --- a/docs/specs/07-spec-issue-triage/01-proofs/T01-proofs.md +++ /dev/null @@ -1,54 +0,0 @@ -# T01 Proof Summary: Issue Tool Bindings in GitHubActionsBinder - -## Task - -Add `issue:comment` and `issue:label` tool bindings to `GitHubActionsBinder` so -workflows can interact with GitHub issues. - -## Changes Made - -**Modified:** -- `src/agentry/binders/github_actions.py` - - Extended `SUPPORTED_TOOLS` frozenset with `"issue:comment"` and `"issue:label"` - - Added `_extract_issue_number()` static method (parallel to `_extract_pr_number`) - - Store `self._issue_number` in `__init__` from issues event payload - - Implemented `_make_issue_comment()` — POST `/repos/{owner}/{repo}/issues/{number}/comments` - - Implemented `_make_issue_label()` — POST `/repos/{owner}/{repo}/issues/{number}/labels` - - Wired both in `bind_tools()` dispatch - -**Modified:** -- `tests/unit/test_github_binder_tools.py` - - Added `TestExtractIssueNumber` class (6 tests) - - Added `TestBindToolsIssueComment` class (11 tests) - - Added `TestBindToolsIssueLabel` class (12 tests) - -**Created:** -- `tests/integration/test_issue_tools.py` - - `TestIssueCommentIntegration` (8 tests) - - `TestIssueLabelIntegration` (9 tests) - - `TestSupportedToolsContainsIssueTools` (3 tests) - -## Proof Artifacts - -| Artifact | Type | Status | -|----------|------|--------| -| T01-01-test.txt | test | PASS (97/97) | -| T01-02-cli.txt | cli | PASS | - -## Test Results - -- 97 tests total, 97 passed, 0 failed -- All existing binder tests continue to pass (48 pre-existing) -- 49 new tests covering issue:comment and issue:label bindings - -## Key Behaviours Verified - -1. `issue:comment` and `issue:label` appear in `SUPPORTED_TOOLS` -2. `_extract_issue_number()` returns the issue number from `issues` events, `None` otherwise -3. `_issue_number` is `None` when event is not `issues` -4. `issue:comment` posts to `POST /repos/{owner}/{repo}/issues/{number}/comments` -5. `issue:label` posts to `POST /repos/{owner}/{repo}/issues/{number}/labels` -6. Both tools raise `ValueError` with "issues event" message when called outside an issues context -7. Both tools raise `RuntimeError` with structured remediation on 403/404 API errors -8. Both tools raise `RuntimeError` with "timeout" on network timeout -9. `issue:label` additionally handles 422 (validation failure) with remediation hint diff --git a/docs/specs/07-spec-issue-triage/03-proofs/T03-01-test.txt b/docs/specs/07-spec-issue-triage/03-proofs/T03-01-test.txt deleted file mode 100644 index 0ef5f83..0000000 --- a/docs/specs/07-spec-issue-triage/03-proofs/T03-01-test.txt +++ /dev/null @@ -1,46 +0,0 @@ -============================= test session starts ============================== -platform linux -- Python 3.13.7, pytest-9.0.2, pluggy-1.6.0 -- /.venv/bin/python -cachedir: .pytest_cache -rootdir: -configfile: pyproject.toml -plugins: asyncio-1.3.0, cov-7.0.0, anyio-4.12.1 -asyncio: mode=Mode.STRICT, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function -collecting ... collected 35 items - -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_severity_badge_for_critical PASSED [ 2%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_severity_badge_for_high PASSED [ 5%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_severity_badge_for_medium PASSED [ 8%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_severity_badge_for_low PASSED [ 11%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_category PASSED [ 14%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_affected_components PASSED [ 17%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_recommended_assignee PASSED [ 20%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_reasoning PASSED [ 22%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_includes_token_usage_when_present PASSED [ 25%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_fallback_when_output_json_missing PASSED [ 28%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_fallback_for_invalid_json PASSED [ 31%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_fallback_to_raw_response_when_no_structured_data PASSED [ 34%] -tests/unit/test_issue_output_formatting.py::TestFormatTriageComment::test_full_triage_output_renders_all_sections PASSED [ 37%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_posts_to_correct_endpoint PASSED [ 40%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_includes_authorization_header PASSED [ 42%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_includes_body_in_payload PASSED [ 45%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_returns_response_json PASSED [ 48%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_403_raises_runtime_with_issues_write_remediation PASSED [ 51%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_404_raises_runtime_with_issue_number PASSED [ 54%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_timeout_raises_runtime_error PASSED [ 57%] -tests/unit/test_issue_output_formatting.py::TestPostIssueComment::test_other_http_error_includes_status PASSED [ 60%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_applies_severity_and_category_labels PASSED [ 62%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_applies_only_severity_when_category_missing PASSED [ 65%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_posts_to_correct_labels_endpoint PASSED [ 68%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_label_api_error_does_not_propagate PASSED [ 71%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_label_timeout_does_not_propagate PASSED [ 74%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_logs_warning_when_output_missing PASSED [ 77%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_logs_warning_when_output_malformed PASSED [ 80%] -tests/unit/test_issue_output_formatting.py::TestApplyTriageLabels::test_logs_warning_when_no_severity_or_category PASSED [ 82%] -tests/unit/test_issue_output_formatting.py::TestMapOutputsIssuesEvent::test_issues_event_posts_triage_comment PASSED [ 85%] -tests/unit/test_issue_output_formatting.py::TestMapOutputsIssuesEvent::test_issues_event_posts_to_correct_comment_url PASSED [ 88%] -tests/unit/test_issue_output_formatting.py::TestMapOutputsIssuesEvent::test_issues_event_attempts_label_application PASSED [ 91%] -tests/unit/test_issue_output_formatting.py::TestMapOutputsIssuesEvent::test_issues_event_label_failure_does_not_fail_map_outputs PASSED [ 94%] -tests/unit/test_issue_output_formatting.py::TestMapOutputsIssuesEvent::test_issues_event_returns_correct_paths PASSED [ 97%] -tests/unit/test_issue_output_formatting.py::TestMapOutputsIssuesEvent::test_non_issues_event_does_not_post_triage_comment PASSED [100%] - -============================== 35 passed in 0.04s ============================== diff --git a/docs/specs/07-spec-issue-triage/03-proofs/T03-02-test.txt b/docs/specs/07-spec-issue-triage/03-proofs/T03-02-test.txt deleted file mode 100644 index ecdc7b5..0000000 --- a/docs/specs/07-spec-issue-triage/03-proofs/T03-02-test.txt +++ /dev/null @@ -1,20 +0,0 @@ -============================= test session starts ============================== -platform linux -- Python 3.13.7, pytest-9.0.2, pluggy-1.6.0 -- /.venv/bin/python -cachedir: .pytest_cache -rootdir: -configfile: pyproject.toml -plugins: asyncio-1.3.0, cov-7.0.0, anyio-4.12.1 -asyncio: mode=Mode.STRICT, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function -collecting ... collected 9 items - -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_complete_triage_output_posts_comment_and_labels PASSED [ 11%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_comment_body_contains_all_triage_fields PASSED [ 22%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_labels_posted_with_correct_format PASSED [ 33%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_label_api_failure_does_not_abort_run PASSED [ 44%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_label_timeout_does_not_abort_run PASSED [ 55%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_comment_uses_correct_endpoint_for_issue PASSED [ 66%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_output_missing_posts_fallback_comment PASSED [ 77%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_runs_dir_created_for_issues_event PASSED [ 88%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_token_usage_included_in_comment PASSED [100%] - -============================== 9 passed in 0.03s =============================== diff --git a/docs/specs/07-spec-issue-triage/03-proofs/T03-03-lint.txt b/docs/specs/07-spec-issue-triage/03-proofs/T03-03-lint.txt deleted file mode 100644 index 8539cd2..0000000 --- a/docs/specs/07-spec-issue-triage/03-proofs/T03-03-lint.txt +++ /dev/null @@ -1 +0,0 @@ -Success: no issues found in 1 source file diff --git a/docs/specs/07-spec-issue-triage/03-proofs/T03-proofs.md b/docs/specs/07-spec-issue-triage/03-proofs/T03-proofs.md deleted file mode 100644 index dbb3d7c..0000000 --- a/docs/specs/07-spec-issue-triage/03-proofs/T03-proofs.md +++ /dev/null @@ -1,54 +0,0 @@ -# T03 Proof Summary: Triage Output Formatting and Label Derivation - -**Task:** T03 - Triage Output Formatting and Label Derivation -**Timestamp:** 2026-03-27 -**Status:** PASS - -## Implementation Summary - -Extended `GitHubActionsBinder` in `src/agentry/binders/github_actions.py` with: - -1. **`map_outputs()` updated** to detect issues events (`self._issue_number is not None`) - and call `_format_triage_comment()`, `_post_issue_comment()`, and `_apply_triage_labels()`. - -2. **`_format_triage_comment(output_path)`** - Renders triage agent output as Markdown with: - - Severity badge (shield.io badge for critical/high/medium/low) - - Category field - - Affected components list - - Recommended assignee - - Reasoning section - - Token usage footer - - Graceful fallbacks for missing/malformed output.json - -3. **`_post_issue_comment(body)`** - Posts the formatted comment to the GitHub Issues API - (`POST /repos/{owner}/{repo}/issues/{number}/comments`) with structured error handling - for 403, 404, and network timeouts. - -4. **`_apply_triage_labels(output_path)`** - Reads severity and category from agent output, - applies labels as `severity:{value}` and `category:{value}` to the issue via the GitHub API. - Label application is best-effort: all errors are logged as warnings and not propagated. - -## Proof Artifacts - -| File | Type | Status | -|------|------|--------| -| T03-01-test.txt | Unit tests (35 tests) | PASS | -| T03-02-test.txt | Integration tests (9 tests) | PASS | -| T03-03-lint.txt | Ruff + mypy on github_actions.py | PASS | - -## Files Modified - -- `src/agentry/binders/github_actions.py` - Added `_format_triage_comment()`, `_post_issue_comment()`, `_apply_triage_labels()` methods; updated `map_outputs()` to handle issues events. - -## Files Created - -- `tests/unit/test_issue_output_formatting.py` - 35 unit tests covering all new methods -- `tests/integration/test_issue_triage_pipeline.py` - 9 integration tests covering end-to-end pipeline flow - -## Test Results - -- Unit tests: 35 passed -- Integration tests: 9 passed -- Full suite: 1694 passed, 3 skipped -- Ruff: All checks passed -- mypy: No issues found diff --git a/docs/specs/07-spec-issue-triage/05-proofs/T05-01-test.txt b/docs/specs/07-spec-issue-triage/05-proofs/T05-01-test.txt deleted file mode 100644 index 36a4edc..0000000 --- a/docs/specs/07-spec-issue-triage/05-proofs/T05-01-test.txt +++ /dev/null @@ -1,19 +0,0 @@ -Type: test -Command: uv run pytest tests/e2e/test_public_surface.py::test_validate_all_standard_workflows -v -Expected: All 5 standard workflows validate (including triage.yaml) -Timestamp: 2026-03-27T00:00:00Z - -============================= test session starts ============================== -platform linux -- Python 3.13.7, pytest-9.0.2, pluggy-1.6.0 -rootdir: -collected 5 items - -tests/e2e/test_public_surface.py::test_validate_all_standard_workflows[code-review.yaml] PASSED [ 20%] -tests/e2e/test_public_surface.py::test_validate_all_standard_workflows[triage.yaml] PASSED [ 40%] -tests/e2e/test_public_surface.py::test_validate_all_standard_workflows[bug-fix.yaml] PASSED [ 60%] -tests/e2e/test_public_surface.py::test_validate_all_standard_workflows[task-decompose.yaml] PASSED [ 80%] -tests/e2e/test_public_surface.py::test_validate_all_standard_workflows[planning-pipeline.yaml] PASSED [100%] - -============================== 5 passed in 0.39s =============================== - -Status: PASS diff --git a/docs/specs/07-spec-issue-triage/05-proofs/T05-02-test.txt b/docs/specs/07-spec-issue-triage/05-proofs/T05-02-test.txt deleted file mode 100644 index 92a2640..0000000 --- a/docs/specs/07-spec-issue-triage/05-proofs/T05-02-test.txt +++ /dev/null @@ -1,14 +0,0 @@ -Type: test -Command: uv run pytest tests/e2e/test_public_surface.py -v -q -Expected: All 38 e2e tests pass (previously 3 were failing) -Timestamp: 2026-03-27T00:00:00Z - -38 e2e tests collected and run. -All 38 passed in 45.02s. - -Previously failing tests that now PASS: - - test_validate_all_standard_workflows[triage.yaml] (StringInput source/fallback now accepted) - - test_run_planning_pipeline_stub (triage.yaml now loads; max_iterations propagated correctly) - - test_run_node_isolation (triage.yaml now loads for composition sub-workflow) - -Status: PASS diff --git a/docs/specs/07-spec-issue-triage/05-proofs/T05-03-lint.txt b/docs/specs/07-spec-issue-triage/05-proofs/T05-03-lint.txt deleted file mode 100644 index facf7bb..0000000 --- a/docs/specs/07-spec-issue-triage/05-proofs/T05-03-lint.txt +++ /dev/null @@ -1,9 +0,0 @@ -Type: cli -Command: uv run ruff check src/agentry/ && uv run mypy src/agentry/ --ignore-missing-imports -Expected: No lint or type errors -Timestamp: 2026-03-27T00:00:00Z - -ruff check src/agentry/: All checks passed! -mypy src/agentry/ --ignore-missing-imports: Success: no issues found in 58 source files - -Status: PASS diff --git a/docs/specs/07-spec-issue-triage/05-proofs/T05-proofs.md b/docs/specs/07-spec-issue-triage/05-proofs/T05-proofs.md deleted file mode 100644 index 7c5a822..0000000 --- a/docs/specs/07-spec-issue-triage/05-proofs/T05-proofs.md +++ /dev/null @@ -1,42 +0,0 @@ -# T05 Proof Summary: Add source and fallback fields to StringInput model - -## Task - -Fix validation failure: `StringInput` in `src/agentry/models/inputs.py` uses -`extra="forbid"` and rejected the `source` and `fallback` fields added to -`triage.yaml` by T02. - -## Changes Made - -### Primary Fix (in scope) -- `src/agentry/models/inputs.py`: Added `source: str | None = None` and - `fallback: str | None = None` to `StringInput` model. - -### Enabling Changes (needed for tests to pass) -- `src/agentry/binders/local.py`: Added stub implementations for `issue:comment` - and `issue:label` in the local binder so that triage.yaml (which declares - those tools) can be executed locally. -- `workflows/triage.yaml`: Converted from `model:` block to `agent:` block with - `max_iterations: 1` so the workflow completes within the 30-second test timeout. -- `workflows/task-decompose.yaml`: Same conversion for the sub-workflow used by - planning-pipeline. -- `src/agentry/composition/engine.py`: Fixed `max_iterations` propagation from - `AgentBlock` to `agent_cfg` when executing composition nodes. -- `src/agentry/runners/in_process.py`: Fixed `max_iterations` propagation from - `agent_config.agent_config` to `AgentTask`. -- `src/agentry/agents/claude_code.py`: Fixed `_build_command` to use - `agent_task.max_iterations` (task-level) over `self._max_turns` (instance-level). - -## Proof Artifacts - -| File | Type | Status | -|------|------|--------| -| T05-01-test.txt | test (validate all standard workflows) | PASS | -| T05-02-test.txt | test (full e2e suite) | PASS | -| T05-03-lint.txt | cli (ruff + mypy) | PASS | - -## Test Results Summary - -- Before: 3 e2e failures (validate[triage.yaml], planning_pipeline_stub, node_isolation) -- After: 0 e2e failures (38/38 pass) -- Unit/integration: 1694 passed, 3 skipped diff --git a/docs/specs/triage/04-proofs/T04-01-workflow-validation.txt b/docs/specs/triage/04-proofs/T04-01-workflow-validation.txt deleted file mode 100644 index c5db8dc..0000000 --- a/docs/specs/triage/04-proofs/T04-01-workflow-validation.txt +++ /dev/null @@ -1,43 +0,0 @@ -Type: file -Description: GitHub Actions workflow file exists and contains correct structure -Expected: File exists with valid YAML, correct triggers, and permissions -Timestamp: 2026-03-27T00:00:00Z - -Test: Check workflow file exists -Command: test -f .github/workflows/agentry-issue-triage.yml && echo "FOUND" -Output: -FOUND - -Test: Validate YAML syntax -Command: python3 -c "import yaml; yaml.safe_load(open('.github/workflows/agentry-issue-triage.yml')); print('VALID')" -Output: -VALID - -Test: Check workflow name -Command: grep -A1 "^name:" .github/workflows/agentry-issue-triage.yml -Output: -name: "Agentry: Issue Triage" - -Test: Check trigger event -Command: grep -A2 "^'on':" .github/workflows/agentry-issue-triage.yml -Output: -'on': - issues: - types: [opened] - -Test: Check permissions -Command: grep -A2 "^permissions:" .github/workflows/agentry-issue-triage.yml -Output: -permissions: - contents: read - issues: write - -Test: Check agentry run command includes all required inputs -Command: grep -A5 "agentry --output-format json run" .github/workflows/agentry-issue-triage.yml -Output: -agentry --output-format json run workflows/triage.yaml - --input issue-description="${{ github.event.issue.body }}" - --input repository-ref=. - --binder github-actions - -Status: PASS diff --git a/docs/specs/triage/04-proofs/T04-02-triage-config-validation.txt b/docs/specs/triage/04-proofs/T04-02-triage-config-validation.txt deleted file mode 100644 index cf6790f..0000000 --- a/docs/specs/triage/04-proofs/T04-02-triage-config-validation.txt +++ /dev/null @@ -1,48 +0,0 @@ -Type: file -Description: triage.yaml workflow configuration includes issue tool capabilities -Expected: File contains repository:read, issue:comment, and issue:label in tools.capabilities -Timestamp: 2026-03-27T00:00:00Z - -Test: Check triage.yaml file exists -Command: test -f workflows/triage.yaml && echo "FOUND" -Output: -FOUND - -Test: Validate YAML syntax -Command: python3 -c "import yaml; yaml.safe_load(open('workflows/triage.yaml')); print('VALID')" -Output: -VALID - -Test: Check tools.capabilities section -Command: sed -n '/^tools:/,/^[a-z]/p' workflows/triage.yaml | head -5 -Output: -tools: - capabilities: - - repository:read - - issue:comment - - issue:label - -Test: Verify all three capabilities are present -Command: grep -E "repository:read|issue:comment|issue:label" workflows/triage.yaml | wc -l -Output: -3 - -Test: Check repository:read capability exists -Command: grep "repository:read" workflows/triage.yaml && echo "FOUND" -Output: - - repository:read -FOUND - -Test: Check issue:comment capability exists -Command: grep "issue:comment" workflows/triage.yaml && echo "FOUND" -Output: - - issue:comment -FOUND - -Test: Check issue:label capability exists -Command: grep "issue:label" workflows/triage.yaml && echo "FOUND" -Output: - - issue:label -FOUND - -Status: PASS diff --git a/docs/specs/triage/04-proofs/T04-03-integration-tests.txt b/docs/specs/triage/04-proofs/T04-03-integration-tests.txt deleted file mode 100644 index bf22e09..0000000 --- a/docs/specs/triage/04-proofs/T04-03-integration-tests.txt +++ /dev/null @@ -1,50 +0,0 @@ -Type: test -Description: Integration tests verify issue triage pipeline and tools work correctly -Expected: All issue-related tests pass (29 tests total) -Timestamp: 2026-03-27T00:00:00Z - -Command: uv run pytest tests/integration/test_issue_triage_pipeline.py tests/integration/test_issue_tools.py -v - -Output: -============================= test session starts ============================== -platform linux -- Python 3.13.7, pytest-9.0.2, pluggy-1.6.0 -- /.venv/bin/python -cachedir: .pytest_cache -rootdir: -configfile: pyproject.toml -plugins: asyncio-1.3.0, cov-7.0.0, anyio-4.12.1 -asyncio: mode=Mode.STRICT, debug=False, asyncio_default_fixture_loop_scope=None, asyncio_default_test_loop_scope=function -collecting ... collected 29 items - -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_complete_triage_output_posts_comment_and_labels PASSED [ 3%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_comment_body_contains_all_triage_fields PASSED [ 6%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_labels_posted_with_correct_format PASSED [ 10%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_label_api_failure_does_not_abort_run PASSED [ 13%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_label_timeout_does_not_abort_run PASSED [ 17%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_comment_uses_correct_endpoint_for_issue PASSED [ 20%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_output_missing_posts_fallback_comment PASSED [ 24%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_runs_dir_created_for_issues_event PASSED [ 27%] -tests/integration/test_issue_triage_pipeline.py::TestIssueTriagePipelineFlow::test_token_usage_included_in_comment PASSED [ 31%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_sends_correct_api_request PASSED [ 34%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_sends_correct_body PASSED [ 37%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_includes_authorization_header PASSED [ 41%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_returns_response_json PASSED [ 44%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_403_raises_runtime_with_remediation PASSED [ 48%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_404_raises_runtime_with_issue_number PASSED [ 51%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_timeout_raises_runtime_error PASSED [ 55%] -tests/integration/test_issue_tools.py::TestIssueCommentIntegration::test_issue_comment_raises_on_non_issues_event PASSED [ 58%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_sends_correct_api_request PASSED [ 62%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_sends_labels_in_payload PASSED [ 65%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_includes_authorization_header PASSED [ 68%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_returns_response_json PASSED [ 72%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_403_raises_runtime_with_remediation PASSED [ 75%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_404_raises_runtime_with_issue_number PASSED [ 79%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_422_raises_runtime_with_validation_hint PASSED [ 82%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_timeout_raises_runtime_error PASSED [ 86%] -tests/integration/test_issue_tools.py::TestIssueLabelIntegration::test_issue_label_raises_on_non_issues_event PASSED [ 89%] -tests/integration/test_issue_tools.py::TestSupportedToolsContainsIssueTools::test_issue_comment_in_supported_tools PASSED [ 93%] -tests/integration/test_issue_tools.py::TestSupportedToolsContainsIssueTools::test_issue_label_in_supported_tools PASSED [ 96%] -tests/integration/test_issue_tools.py::TestSupportedToolsContainsIssueTools::test_both_issue_tools_can_be_bound_together PASSED [100%] - -============================== 29 passed in 0.04s ============================== - -Status: PASS diff --git a/docs/specs/triage/04-proofs/T04-proofs.md b/docs/specs/triage/04-proofs/T04-proofs.md deleted file mode 100644 index 3044baf..0000000 --- a/docs/specs/triage/04-proofs/T04-proofs.md +++ /dev/null @@ -1,70 +0,0 @@ -# T04: GitHub Actions Workflow for Issue Triage - Proof Artifacts - -## Summary - -Successfully implemented GitHub Actions workflow for issue triage with the following components: - -### Artifacts Generated - -1. **T04-01-workflow-validation.txt** - Validates the GitHub Actions workflow file structure -2. **T04-02-triage-config-validation.txt** - Validates the triage.yaml configuration -3. **T04-03-integration-tests.txt** - Integration test results - -## Implementation Details - -### Files Created/Modified - -1. **.github/workflows/agentry-issue-triage.yml** (NEW) - - Trigger: issues with type [opened] - - Permissions: contents:read, issues:write - - Follows agentry-code-review.yml structure - - Includes all required inputs: issue-description, repository-ref - - Binder: github-actions - - Environment variables: CLAUDE_CODE_OAUTH_TOKEN, GITHUB_TOKEN - -2. **workflows/triage.yaml** (MODIFIED) - - Updated tools.capabilities to include: - - repository:read (existing) - - issue:comment (new) - - issue:label (new) - -## Test Results - -### Validation Tests: PASS -- Workflow file exists with valid YAML syntax -- Correct trigger configuration (issues: [opened]) -- Proper permissions (contents:read, issues:write) -- All required agentry run parameters present -- Triage.yaml contains all three tool capabilities - -### Integration Tests: PASS (29/29) -- Issue triage pipeline tests: 9 passed -- Issue tools integration tests: 20 passed - - Issue comment functionality: 8 tests - - Issue label functionality: 8 tests - - Tool capability tests: 4 tests - -## Verification Commands - -```bash -# Verify workflow file -test -f .github/workflows/agentry-issue-triage.yml - -# Validate YAML syntax -python3 -c "import yaml; yaml.safe_load(open('.github/workflows/agentry-issue-triage.yml'))" - -# Check capabilities in triage.yaml -grep -A3 "tools:" workflows/triage.yaml | grep "capabilities" -A3 - -# Run integration tests -uv run pytest tests/integration/test_issue_triage_pipeline.py tests/integration/test_issue_tools.py -v -``` - -## Proof Status: COMPLETE - -All requirements satisfied: -- GitHub Actions workflow created with correct trigger, permissions, and structure -- triage.yaml updated with issue:comment and issue:label capabilities -- All integration tests passing -- YAML validation successful -- No security issues in proof artifacts diff --git a/src/agentry/binders/github_actions.py b/src/agentry/binders/github_actions.py index bc76416..06915f5 100644 --- a/src/agentry/binders/github_actions.py +++ b/src/agentry/binders/github_actions.py @@ -931,9 +931,28 @@ def _format_output_comment(self, output_path: Path) -> str: except (json.JSONDecodeError, ValueError): return f"**Agent Output**\n\n```\n{raw[:3000]}\n```" - agent_output = data.get("output") or {} + agent_output = data.get("output") parts: list[str] = ["## Agentry Code Review\n"] + # Handle case where output is a string (raw agent response). + if isinstance(agent_output, str) and agent_output.strip(): + # Try to extract JSON from the string (agent may wrap in markdown). + extracted = self._extract_json_from_text(agent_output) + if isinstance(extracted, dict): + agent_output = extracted + else: + parts.append(f"```\n{agent_output[:3000]}\n```") + # Token usage + usage = data.get("token_usage", {}) + if usage: + _in = usage.get("input_tokens", 0) + _out = usage.get("output_tokens", 0) + parts.append(f"\n---\n*Tokens: {_in:,} in / {_out:,} out*") + return "\n".join(parts) + + if not isinstance(agent_output, dict): + agent_output = {} + # Summary summary = agent_output.get("summary", "") if summary: @@ -968,6 +987,14 @@ def _format_output_comment(self, output_path: Path) -> str: if raw_response and not findings and not summary: parts.append(f"```\n{raw_response[:3000]}\n```") + # If we still have nothing useful, show a diagnostic message. + if not findings and not summary and not raw_response: + parts.append("*No structured output was returned by the agent.*\n") + # Include raw output from the execution record if available. + raw_stdout = data.get("raw_stdout", "") + if raw_stdout: + parts.append(f"```\n{raw_stdout[:3000]}\n```") + # Token usage usage = data.get("token_usage", {}) if usage: @@ -977,6 +1004,52 @@ def _format_output_comment(self, output_path: Path) -> str: return "\n".join(parts) + @staticmethod + def _extract_json_from_text(text: str) -> dict[str, Any] | None: + """Extract a JSON object from text that may contain markdown fences. + + Handles common patterns where Claude wraps JSON in ```json ... ``` + code fences or includes preamble text before the JSON. + + Returns: + The parsed dict if found, or ``None`` if no valid JSON object + could be extracted. + """ + import re + + # Try direct parse first. + try: + parsed = json.loads(text) + if isinstance(parsed, dict): + return parsed + except (json.JSONDecodeError, ValueError): + pass + + # Try extracting from ```json ... ``` fences. + fence_match = re.search(r"```(?:json)?\s*\n(.*?)```", text, re.DOTALL) + if fence_match: + try: + parsed = json.loads(fence_match.group(1)) + if isinstance(parsed, dict): + return parsed + except (json.JSONDecodeError, ValueError): + pass + + # Try finding first { ... } block. + brace_start = text.find("{") + if brace_start >= 0: + # Walk from the end to find the matching closing brace. + brace_end = text.rfind("}") + if brace_end > brace_start: + try: + parsed = json.loads(text[brace_start : brace_end + 1]) + if isinstance(parsed, dict): + return parsed + except (json.JSONDecodeError, ValueError): + pass + + return None + def _post_output_comment(self, body: str) -> dict[str, Any]: """Post agent output as a PR comment via the GitHub REST API. diff --git a/workflows/bug-fix.yaml b/workflows/bug-fix.yaml index 13e5daa..18c55ce 100644 --- a/workflows/bug-fix.yaml +++ b/workflows/bug-fix.yaml @@ -8,6 +8,8 @@ inputs: type: string required: true description: A description of the bug or unexpected behaviour to investigate. + source: issue.body + fallback: issue.title repository-ref: type: repository-ref required: true @@ -17,16 +19,14 @@ tools: capabilities: - repository:read - shell:execute + - pr:create + - issue:comment -model: - provider: anthropic - model_id: claude-sonnet-4-20250514 - temperature: 0.2 - max_tokens: 4096 +agent: + runtime: claude-code + model: claude-sonnet-4-20250514 system_prompt: prompts/bug-fix-system-prompt.md - retry: - max_attempts: 3 - backoff: exponential + max_iterations: 3 safety: trust: elevated diff --git a/workflows/feature-implement.yaml b/workflows/feature-implement.yaml new file mode 100644 index 0000000..8846832 --- /dev/null +++ b/workflows/feature-implement.yaml @@ -0,0 +1,82 @@ +identity: + name: feature-implement + version: 1.0.0 + description: Implement a feature based on planning-pipeline decomposition output, or decompose it further into scoped sub-issues if too large. + +inputs: + issue-description: + type: string + required: true + description: A description of the feature to implement, sourced from the issue body. + source: issue.body + fallback: issue.title + repository-ref: + type: repository-ref + required: true + description: The repository in which to implement the feature. + +tools: + capabilities: + - repository:read + - shell:execute + - pr:create + - issue:comment + - issue:label + - issue:create + +agent: + runtime: claude-code + model: claude-sonnet-4-20250514 + system_prompt: prompts/feature-implement-system-prompt.md + max_iterations: 10 + +safety: + trust: elevated + resources: + timeout: 600 + +output: + schema: + type: object + required: + - action + - reasoning + allOf: + - if: + properties: + action: + const: implemented + then: + required: [pr_url] + - if: + properties: + action: + const: decomposed + then: + required: [sub_issues] + properties: + action: + type: string + enum: + - implemented + - decomposed + description: Whether the feature was implemented directly or decomposed into sub-issues. + pr_url: + type: string + description: URL of the pull request opened for the implementation. Present when action is 'implemented'. + sub_issues: + type: array + description: List of sub-issue URLs created when the feature was decomposed. Present when action is 'decomposed'. + items: + type: string + reasoning: + type: string + description: Explanation of why the feature was implemented directly or decomposed. + side_effects: + - type: terminal + description: Print implementation or decomposition summary to stdout + output_paths: + - feature-implement-result.json + +composition: + steps: [] diff --git a/workflows/planning-pipeline.yaml b/workflows/planning-pipeline.yaml index ae324ae..091acea 100644 --- a/workflows/planning-pipeline.yaml +++ b/workflows/planning-pipeline.yaml @@ -8,15 +8,21 @@ inputs: type: string required: true description: A description of the issue to plan. + source: issue.body + fallback: issue.title repository-ref: type: repository-ref required: true description: The repository to inspect for component context. -model: - provider: anthropic - model_id: claude-sonnet-4-20250514 - temperature: 0.2 +tools: + capabilities: + - issue:comment + - issue:label + +agent: + runtime: claude-code + model: claude-sonnet-4-20250514 safety: trust: elevated diff --git a/workflows/prompts/bug-fix-system-prompt.md b/workflows/prompts/bug-fix-system-prompt.md index ef04fa4..b6710f1 100644 --- a/workflows/prompts/bug-fix-system-prompt.md +++ b/workflows/prompts/bug-fix-system-prompt.md @@ -1,20 +1,27 @@ -You are an expert software engineer specializing in bug diagnosis and root cause analysis. +You are an expert software engineer specializing in bug diagnosis, root cause analysis, and automated repair. -Your task is to analyze a bug report for a software repository and produce a structured diagnosis with a concrete fix suggestion. +Your task is to analyze a bug report for a software repository, diagnose the root cause, implement a targeted fix, commit the change, and open a pull request for review. When given an issue description and access to repository files, you will: 1. **Diagnose** the issue by identifying the symptoms and the subsystem likely involved. 2. **Identify the root cause** by tracing through the relevant code paths. -3. **Suggest a fix** with a specific file, line number or range, and the code change required. -4. **Assess confidence** in your diagnosis and fix on a scale from 0.0 (uncertain) to 1.0 (certain). +3. **Implement the fix** with a minimal, targeted code change that does not introduce new risk. +4. **Validate the fix** by running the relevant tests or checks for the changed code. Do not proceed to commit if tests fail. +5. **Commit the fix** with a descriptive commit message that references the originating issue number (e.g. `fix: resolve null pointer in parser (fixes #42)`). +6. **Open a pull request** using the `pr:create` tool with: + - A clear title summarising the fix. + - A body that references the originating issue number (e.g. `Fixes #42`). + - The label `agent-proposed` applied to the PR. +7. **Post a comment** on the original issue using the `issue:comment` tool, linking to the newly created PR so the reporter is informed of the fix. Guidelines: - Be precise about file paths and line numbers. - Prefer minimal, targeted fixes that do not introduce new risk. - If multiple root causes are plausible, choose the most likely one and mention alternatives in your reasoning. - Do not invent files or functions that you have not read from the repository. -- If you cannot determine the root cause from available information, state that clearly in the diagnosis and set confidence below 0.4. +- If you cannot determine the root cause from available information, state that clearly and set confidence below 0.4 — do not open a PR for low-confidence diagnoses. +- Always include the originating issue number in the commit message, PR body, and the comment posted to the issue. Output format: JSON object with keys `diagnosis`, `root_cause`, `suggested_fix`, and `confidence`. The `suggested_fix` must contain `file`, `line`, and `change` sub-fields. diff --git a/workflows/prompts/feature-implement-system-prompt.md b/workflows/prompts/feature-implement-system-prompt.md new file mode 100644 index 0000000..aa66390 --- /dev/null +++ b/workflows/prompts/feature-implement-system-prompt.md @@ -0,0 +1,66 @@ +You are an expert software engineer tasked with implementing features autonomously. You receive a feature request sourced from a GitHub issue and must decide whether to implement it directly or break it down into scoped sub-issues. + +## Your Workflow + +### Step 1: Understand the feature + +1. Read the issue body carefully. +2. Check for planning-pipeline decomposition in issue comments. If no decomposition comment exists, proceed from the issue body alone and explicitly note that absence in `reasoning`. + +### Step 2: Assess implementability + +Determine whether the feature is small enough to implement in a single pass using the following heuristics: + +- **Implementable directly** if the change touches **5 or fewer files** and requires **500 or fewer lines** of new or modified code. +- **Too large** if the change would span more than 5 files or more than 500 lines, or requires coordinated changes across multiple subsystems that would be risky to land in one PR. + +When in doubt, prefer decomposition to keep PRs reviewable. + +### Step 3a: If implementable — implement + +1. Read the relevant source files to understand the existing patterns and conventions. +2. Implement the feature with tests. Follow the coding style present in the repository. +3. Commit the changes with a descriptive message that references the originating issue number (e.g. `feat: add dark mode toggle (closes #42)`). +4. Open a pull request using the `pr:create` tool: + - Title: a concise summary of the feature. + - Body: references the originating issue (e.g. `Closes #42`) and describes what was changed and why. + - Apply the label `agent-proposed` to the PR. +5. Post a comment on the original issue using the `issue:comment` tool, linking to the newly opened PR and summarising what was implemented. + +Output a JSON object with: +```json +{ + "action": "implemented", + "pr_url": "", + "reasoning": "" +} +``` + +### Step 3b: If too large — decompose + +1. Break the feature down into self-contained sub-tasks, each implementable in a single PR (<=5 files, <=500 lines). +2. For each sub-task, create a GitHub issue using the `issue:create` tool: + - Title: a concise description of the sub-task. + - Body: context from the parent issue, a clear description of what this sub-task covers, and a reference back to the parent issue (e.g. `Part of #42`). + - Apply labels: `category:feature` and `agent-decomposed`. +3. Apply the label `agent-decomposed` to the parent issue using the `issue:label` tool. +4. Post a comment on the parent issue using the `issue:comment` tool listing the sub-issues created and explaining why decomposition was necessary. + +Output a JSON object with: +```json +{ + "action": "decomposed", + "sub_issues": ["", "", "..."], + "reasoning": "" +} +``` + +## Guidelines + +- Always read files before modifying them. Do not invent code without grounding it in the actual repository. +- Write tests alongside implementation code. Do not open a PR with untested changes. +- Keep commits atomic: one logical change per commit. +- If the planning-pipeline has already produced a task breakdown in issue comments, use that as your implementation plan rather than re-deriving it. +- Include the originating issue number in every commit message, PR body, and issue comment. +- Never open a PR for work that is clearly incomplete or broken. +- If you cannot determine what to implement from the available information, post a clarifying comment on the issue and output `action: decomposed` with an empty `sub_issues` list, explaining the blocker in `reasoning`.