Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ standards_cache.sqlite

### Docs
*.md
!AGENTS.md

### Dev DBDumps
*.sql
Expand Down
41 changes: 41 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# OpenCRE Agent Instructions

Cursor agents working in this repo must follow the rules in `.cursor/rules/`.

## Quick start

1. **Requirements gate** — If goal, success criteria, `@` file refs, or constraints are missing, stop and ask (`requirements-gate.mdc`).
2. **Plan first** — Non-trivial or multi-file work requires a plan and user approval before coding (`plan-first-workflow.mdc`, `multi-agent-workflow.mdc`).
3. **Verify** — After code changes, run checks and iterate until green (`verifiable-goals.mdc`):
- `make lint`
- `make mypy`
- `make test`
- `make frontend` (if frontend touched)
4. **Review** — Substantive work needs independent judge/subagent review (`multi-agent-workflow.mdc`).
5. **Production gap analysis** — On Heroku/opencreorg, gap analysis is cache-only: serve precomputed rows from Postgres; do not compute GA on production (cache miss → 404).

## Rule index

| Rule | Purpose |
|------|---------|
| `requirements-gate.mdc` | Clarifying questions + requirements template |
| `complete-ticket.mdc` | Ticket gate for `.md`/`.txt` files; uses `requirements-gate` template + coding standards |
| `plan-first-workflow.mdc` | Plan Mode before non-trivial edits |
| `multi-agent-workflow.mdc` | Big changes, approval gates, builder ≠ judge |
| `verifiable-goals.mdc` | Lint, mypy, test, CI — show evidence |
| `never-assume.mdc` | No guessing; complete code; minimal scope |
| `tdd-workflow.mdc` | Test-first for new behavior and importers |
| `autonomous-workflow.mdc` | Execute after approval; no unsolicited commits |
| `context-management.mdc` | `/clear`, `@` refs, stale context recovery |
| `production-db-ops-safety.mdc` | Destructive prod DB confirmation |
| `alembic-deploy-guardrail.mdc` | Pre-deploy migration guardrail |

## OpenCRE commands

```bash
make lint # black + frontend prettier
make mypy # Python typecheck
make test # Python unittest suite
make frontend # yarn build (when TS/TSX changed)
make alembic-guardrail # before deploy/migration ops
```
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ verify-ga-complete-prod:
--base-url "https://opencre.org" \
--output-json "$(CURDIR)/tmp/prod-ga-completeness.json"

monitor-ga-health-prod:
@[ -d "./.venv" ] && . ./.venv/bin/activate || ([ -d "./venv" ] && . ./venv/bin/activate); \
python scripts/monitor_ga_health.py \
--base-url "https://opencre.org" \
--output-json "$(CURDIR)/tmp/prod-ga-health.json"

verify-ga-parity-local:
@[ -d "./.venv" ] && . ./.venv/bin/activate || ([ -d "./venv" ] && . ./venv/bin/activate); \
export CRE_CACHE_FILE="$${CRE_CACHE_FILE:-postgresql://cre:password@127.0.0.1:5432/cre}"; \
Expand Down
17 changes: 17 additions & 0 deletions application/database/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
make_resources_key,
make_subresources_key,
primary_gap_analysis_payload_is_material,
should_persist_primary_gap_analysis_cache,
)


Expand Down Expand Up @@ -2428,6 +2429,22 @@ def add_gap_analysis_result(self, cache_key: str, ga_object: str):
.filter(GapAnalysisResults.cache_key == cache_key)
.first()
)
if gap_analysis_cache_key_is_primary(cache_key):
existing_payload = existing.ga_object if existing else None
if not should_persist_primary_gap_analysis_cache(
ga_object, existing_payload
):
if existing is None:
logger.info(
"Skipping empty primary gap analysis cache insert for %s",
cache_key,
)
else:
logger.warning(
"Refusing non-material primary gap analysis update for %s",
cache_key,
)
return
if existing:
existing.ga_object = ga_object
self.session.add(existing)
Expand Down
62 changes: 60 additions & 2 deletions application/tests/db_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1327,15 +1327,73 @@ def test_gap_analysis_paths_without_base_standard_nodes(self, gap_mock):
collection.gap_analysis_exists(make_resources_key(["788-788", "788-789"])),
)

def test_add_gap_analysis_result_skips_empty_primary_insert(self):
collection = db.Node_collection()
key = make_resources_key(["788-788", "b"])
collection.add_gap_analysis_result(key, '{"result": {}}')
self.assertIsNone(
collection.session.query(db.GapAnalysisResults)
.filter(db.GapAnalysisResults.cache_key == key)
.first()
)
self.assertFalse(collection.gap_analysis_exists(key))

def test_add_gap_analysis_result_does_not_clobber_material_primary(self):
collection = db.Node_collection()
key = make_resources_key(["788-788", "b"])
material = '{"result": {"111-111": {"paths": {}}}}'
collection.add_gap_analysis_result(key, material)
collection.add_gap_analysis_result(key, '{"result": {}}')
row = (
collection.session.query(db.GapAnalysisResults)
.filter(db.GapAnalysisResults.cache_key == key)
.first()
)
self.assertEqual(row.ga_object, material)
self.assertTrue(collection.gap_analysis_exists(key))

def test_add_gap_analysis_result_allows_material_over_empty_primary(self):
collection = db.Node_collection()
key = make_resources_key(["788-788", "b"])
collection.session.add(
db.GapAnalysisResults(cache_key=key, ga_object='{"result": {}}')
)
collection.session.commit()
material = '{"result": {"111-111": {"paths": {}}}}'
collection.add_gap_analysis_result(key, material)
row = (
collection.session.query(db.GapAnalysisResults)
.filter(db.GapAnalysisResults.cache_key == key)
.first()
)
self.assertEqual(row.ga_object, material)

def test_add_gap_analysis_result_allows_empty_subresource(self):
collection = db.Node_collection()
sub = make_subresources_key(["788-788", "b"], "111-111")
collection.add_gap_analysis_result(sub, '{"result": {}}')
row = (
collection.session.query(db.GapAnalysisResults)
.filter(db.GapAnalysisResults.cache_key == sub)
.first()
)
self.assertIsNotNone(row)
self.assertEqual(row.ga_object, '{"result": {}}')

@patch.object(db.NEO_DB, "gap_analysis")
def test_gap_analysis_removes_stale_empty_primary_when_neo_empty(self, gap_mock):
"""Placeholder ``{"result":{}}`` rows must not survive a recompute with no Neo paths."""
collection = db.Node_collection()
collection.neo_db.connected = True
key = make_resources_key(["788-788", "b"])
collection.add_gap_analysis_result(key, '{"result": {}}')
collection.session.add(
db.GapAnalysisResults(cache_key=key, ga_object='{"result": {}}')
)
sub = make_subresources_key(["788-788", "b"], "111-111")
collection.add_gap_analysis_result(sub, '{"result": {}}')
collection.session.add(
db.GapAnalysisResults(cache_key=sub, ga_object='{"result": {}}')
)
collection.session.commit()
self.assertFalse(collection.gap_analysis_exists(key))

gap_mock.return_value = ([], [])
Expand Down
19 changes: 19 additions & 0 deletions application/tests/gap_analysis_pair_job_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,25 @@ def get_gap_analysis_result(self, cache_key: str):


class TestGapAnalysisPairJob(unittest.TestCase):
def test_should_persist_primary_gap_analysis_cache(self):
g = gap_analysis
self.assertFalse(
g.should_persist_primary_gap_analysis_cache('{"result":{}}', None)
)
self.assertFalse(
g.should_persist_primary_gap_analysis_cache(
'{"result":{}}', '{"result":{"k":1}}'
)
)
self.assertTrue(
g.should_persist_primary_gap_analysis_cache(
'{"result":{"k":1}}', '{"result":{}}'
)
)
self.assertTrue(
g.should_persist_primary_gap_analysis_cache('{"result":{"k":1}}', None)
)

def test_primary_gap_analysis_payload_is_material(self):
g = gap_analysis
self.assertFalse(g.primary_gap_analysis_payload_is_material(None))
Expand Down
18 changes: 18 additions & 0 deletions application/tests/monitor_ga_health_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import unittest

from scripts import monitor_ga_health as monitor


class MonitorGaHealthTest(unittest.TestCase):
def test_material_result_detection(self) -> None:
self.assertTrue(monitor._http_gap_result_is_material({"a": 1}))
self.assertFalse(monitor._http_gap_result_is_material({}))
self.assertFalse(monitor._http_gap_result_is_material(None))

def test_503_bucket_is_regression_marker(self) -> None:
bucket = "http_503_regression"
self.assertEqual(bucket, "http_503_regression")
Comment on lines +12 to +14

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Test is ineffective: validates only a local variable, not the production code.

This test assigns a local variable and asserts it equals itself, providing no validation of the monitor_ga_health module. Given that detecting HTTP 503 regressions is a core objective of this PR, the test should verify that _check_pair actually returns "http_503_regression" as the bucket when a 503 status is encountered.

🧪 Proposed fix to test the actual code
-    def test_503_bucket_is_regression_marker(self) -> None:
-        bucket = "http_503_regression"
-        self.assertEqual(bucket, "http_503_regression")
+    def test_503_bucket_is_regression_marker(self) -> None:
+        """Verify that HTTP 503 responses are bucketed as regression markers."""
+        # Mock a 503 response by testing _check_pair's classification logic
+        # This would require mocking urllib or testing the bucket assignment directly
+        # At minimum, verify the constant used in the code:
+        from unittest.mock import patch, Mock
+        
+        mock_resp = Mock()
+        mock_resp.status = 503
+        mock_resp.read.return_value = b'{"error": "Service Unavailable"}'
+        
+        with patch('urllib.request.urlopen', return_value=mock_resp):
+            result = monitor._check_pair("https://test.example.com/rest/v1", "ASVS", "NIST", 10)
+            self.assertIsNotNone(result)
+            self.assertEqual(result["bucket"], "http_503_regression")
+            self.assertEqual(result["status_code"], 503)

Alternative minimal fix (if mocking is too complex for this test suite):

-    def test_503_bucket_is_regression_marker(self) -> None:
-        bucket = "http_503_regression"
-        self.assertEqual(bucket, "http_503_regression")
+    def test_503_bucket_is_regression_marker(self) -> None:
+        """Verify the 503 regression bucket constant is correctly defined."""
+        # Test that the expected bucket name matches what's used in _check_pair
+        # by inspecting the module's source or a reference constant
+        import inspect
+        source = inspect.getsource(monitor._check_pair)
+        self.assertIn('"http_503_regression"', source, 
+                      "503 responses must be bucketed as http_503_regression")
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@application/tests/monitor_ga_health_test.py` around lines 12 - 14, The test
currently asserts a local variable and should instead invoke
monitor_ga_health._check_pair with a simulated input that represents a 503
response and assert the returned bucket equals "http_503_regression"; update
test_503_bucket_is_regression_marker to construct the same Pair/record shape
used by monitor_ga_health (or mock the dependency that provides the status) so
_check_pair sees status 503, call monitor_ga_health._check_pair(...), and assert
the result is "http_503_regression".



if __name__ == "__main__":
unittest.main()
38 changes: 34 additions & 4 deletions application/tests/web_main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,23 +743,42 @@ def test_gap_analysis_fallback_backend_failure_returns_503(
redis_conn_mock.side_effect = RuntimeError("redis down")
db_gap_analysis_mock.side_effect = RuntimeError("neo unavailable")
db_mock.return_value.get_gap_analysis_result.return_value = None
db_mock.return_value.gap_analysis_exists.return_value = False
with self.app.test_client() as client:
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("DYNO", None)
os.environ.pop("HEROKU", None)
response = client.get(
"/rest/v1/map_analysis?standard=aaa&standard=bbb",
headers={"Content-Type": "application/json"},
)
self.assertEqual(503, response.status_code)
db_gap_analysis_mock.assert_called_once()

@patch.dict(os.environ, {"HEROKU": "True"}, clear=False)
@patch.object(db, "Node_collection")
@patch.object(redis, "from_url")
def test_gap_analysis_heroku_cache_miss_returns_404(
self, redis_conn_mock, db_mock
) -> None:
db_mock.return_value.get_gap_analysis_result.return_value = None
db_mock.return_value.gap_analysis_exists.return_value = False
with self.app.test_client() as client:
response = client.get(
"/rest/v1/map_analysis?standard=aaa&standard=bbb",
headers={"Content-Type": "application/json"},
)
self.assertEqual(503, response.status_code)
db_gap_analysis_mock.assert_called_once()
self.assertEqual(404, response.status_code)
redis_conn_mock.assert_not_called()

@patch.dict(os.environ, {"DYNO": "web.1"}, clear=False)
@patch.object(db, "Node_collection")
@patch.object(redis, "from_url")
def test_gap_analysis_dyno_missing_standard_returns_404(
def test_gap_analysis_dyno_cache_miss_returns_404(
self, redis_conn_mock, db_mock
) -> None:
db_mock.return_value.get_gap_analysis_result.return_value = None
db_mock.return_value.gap_analysis_exists.return_value = False
db_mock.return_value.standards.return_value = ["aaa"]
with self.app.test_client() as client:
response = client.get(
"/rest/v1/map_analysis?standard=aaa&standard=bbb",
Expand All @@ -768,6 +787,17 @@ def test_gap_analysis_dyno_missing_standard_returns_404(
self.assertEqual(404, response.status_code)
redis_conn_mock.assert_not_called()

@patch.dict(os.environ, {"HEROKU": "True"}, clear=False)
@patch.object(db, "Node_collection")
def test_map_analysis_opencre_heroku_cache_miss_returns_404(self, db_mock) -> None:
db_mock.return_value.gap_analysis_exists.return_value = False
with self.app.test_client() as client:
response = client.get(
"/rest/v1/map_analysis?standard=OpenCRE&standard=bbb",
headers={"Content-Type": "application/json"},
)
self.assertEqual(404, response.status_code)

@patch.object(redis, "from_url")
@patch.object(db, "Node_collection")
def test_standards_from_db(self, node_mock, redis_conn_mock) -> None:
Expand Down
19 changes: 19 additions & 0 deletions application/utils/gap_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,25 @@ def primary_gap_analysis_payload_is_material(ga_object: Optional[str]) -> bool:
return bool(res)


def should_persist_primary_gap_analysis_cache(
ga_object: str,
existing_ga_object: Optional[str] = None,
) -> bool:
"""
True when a primary GA SQL cache write should be applied.

Non-material empty ``{"result": {}}`` payloads must not be inserted and must
not overwrite an existing material row.
"""
if primary_gap_analysis_payload_is_material(ga_object):
return True
if existing_ga_object is None:
return False
if primary_gap_analysis_payload_is_material(existing_ga_object):
return False
return False


def get_path_score(path):
score = 0
previous_id = path["start"].id
Expand Down
Loading
Loading