From ddbe78d69fb45fad884f1358b9cc1233f33edb63 Mon Sep 17 00:00:00 2001 From: Wietze Date: Sat, 13 Jun 2026 09:10:18 -0400 Subject: [PATCH 1/2] fix: correct latest-label tiebreak, NDJSON errors, status validation, honest summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit fixes on the export/flatten core (all with tests; 18 -> 27). - _latest_label: when any label lacks a created_at, the old max() let a timestamped label beat a missing-timestamp one lexicographically — it could return the annotator's label over the reviewer's. Use created_at when every label has one; otherwise fall back to export order (last = newest). - read_export_file: a malformed NDJSON line now raises with its line number instead of a bare JSONDecodeError traceback. - CLI --status: validate against WORKFLOW_STATUSES and fail at the boundary with a helpful message instead of passing a typo to the Labelbox API. - Summary: the CLI printed " reached unlabelled", but the count includes never-reached ToLabel rows; print " without labels" and fix the field comment. Field name unchanged (rename would break the public API). - Lock the documented FeatureRow contract with tests: relationship annotations, the classification/geometry "unknown" fallbacks, and the missing-API-key error. --- src/labelpull/cli.py | 7 +- src/labelpull/core.py | 31 +++++- tests/test_cli.py | 43 +++++++++ tests/test_core.py | 220 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 296 insertions(+), 5 deletions(-) diff --git a/src/labelpull/cli.py b/src/labelpull/cli.py index e4920a5..088abc8 100644 --- a/src/labelpull/cli.py +++ b/src/labelpull/cli.py @@ -15,6 +15,7 @@ from labelpull import __version__ from labelpull.adapters import ADAPTERS, write_csv from labelpull.core import ( + WORKFLOW_STATUSES, FeatureRow, JsonDict, _created_at, @@ -59,6 +60,10 @@ def pull( api_key: str | None = typer.Option(None, help="Labelbox API key (else LABELBOX_API_KEY)."), ) -> None: """Export the latest annotations and flatten them to CSV, with a summary.""" + if status is not None and status not in WORKFLOW_STATUSES: + raise typer.BadParameter( + f"--status must be one of {', '.join(WORKFLOW_STATUSES)}" + ) if schema not in ADAPTERS: raise typer.BadParameter(f"unknown schema {schema!r}; choose from {sorted(ADAPTERS)}") adapter = ADAPTERS[schema]() @@ -87,7 +92,7 @@ def _print_summary(rows: list[JsonDict], features: list[FeatureRow]) -> None: s = summarize(rows, features) typer.echo( f" {s.n_labelled} labelled / {s.n_data_rows} rows " - f"({s.n_reached_unlabelled} reached unlabelled)" + f"({s.n_reached_unlabelled} without labels)" ) if s.statuses: typer.echo(" status: " + ", ".join(f"{k}={v}" for k, v in sorted(s.statuses.items()))) diff --git a/src/labelpull/core.py b/src/labelpull/core.py index bf372e0..388de7b 100644 --- a/src/labelpull/core.py +++ b/src/labelpull/core.py @@ -99,7 +99,15 @@ def read_export_file(path: str | Path) -> list[JsonDict]: loaded = json.loads(text) return loaded if isinstance(loaded, list) else [loaded] except json.JSONDecodeError: - return [json.loads(line) for line in text.splitlines() if line.strip()] + result: list[JsonDict] = [] + for i, line in enumerate(text.splitlines(), start=1): + if not line.strip(): + continue + try: + result.append(json.loads(line)) + except json.JSONDecodeError as e: + raise ValueError(f"malformed NDJSON on line {i}: {e}") from e + return result def flatten(dr: JsonDict, project_id: str | None = None) -> list[FeatureRow]: @@ -168,7 +176,7 @@ class Summary: n_data_rows: int n_labelled: int - n_reached_unlabelled: int + n_reached_unlabelled: int # rows without labels, regardless of workflow stage feature_kinds: dict[str, int] feature_names: dict[str, int] statuses: dict[str, int] @@ -236,11 +244,26 @@ def _select_project(dr: JsonDict, project_id: str | None) -> JsonDict: def _latest_label(proj: JsonDict) -> JsonDict | None: - # A QC-reviewed row carries the annotator's label *and* the reviewer's; the - # verified answer is the most recently created, not labels[0]. + """Return the most-authoritative label from the project export block. + + A QC-reviewed row carries the annotator's label *and* the reviewer's; the + verified answer is the most recently created, not labels[0]. + + Selection rules: + - If ALL labels have a non-empty ``created_at``, return the one with the + maximum (newest) timestamp. + - Otherwise, at least one label is missing a timestamp. Fall back to export + order and return ``labels[-1]``. Export order is last-in-newest by + Labelbox convention, so this is a best-effort approximation when + timestamps are absent. + """ labels = proj.get("labels") or [] if not labels: return None + # Fall back to export order when any label is missing a timestamp. + # (Export order: last-in == newest; deliberate best-effort fallback.) + if not all(_created_at_of_label(lbl) for lbl in labels): + return cast("JsonDict", labels[-1]) return cast("JsonDict", max(labels, key=_created_at_of_label)) diff --git a/tests/test_cli.py b/tests/test_cli.py index 56523aa..618a38b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -116,3 +116,46 @@ def test_cli_unknown_schema_errors() -> None: ["pull", "p", "--schema", "nope", "--from-export", str(FIXTURES / "species_export.ndjson")], ) assert result.exit_code != 0 + + +# --- Fix 3: CLI wording ------------------------------------------------------ + + +def test_cli_summary_says_without_labels_not_reached_unlabelled(tmp_path: Path) -> None: + """CLI output uses 'without labels' (not 'reached unlabelled') for unlabelled count.""" + out = tmp_path / "labels.csv" + result = runner.invoke( + app, + [ + "pull", + "proj_x", + "--from-export", + str(FIXTURES / "species_export.ndjson"), + "-o", + str(out), + ], + ) + assert result.exit_code == 0, result.output + assert "without labels" in result.output + assert "reached unlabelled" not in result.output + + +# --- Fix 4: --status validation ---------------------------------------------- + + +def test_cli_invalid_status_exits_nonzero_with_helpful_message() -> None: + """--status bogus exits non-zero and includes the valid choices in the message.""" + result = runner.invoke( + app, + [ + "pull", + "p", + "--status", + "bogus", + "--from-export", + str(FIXTURES / "species_export.ndjson"), + ], + ) + assert result.exit_code != 0 + combined = (result.output or "") + str(result.exception or "") + assert "ToLabel" in combined or "must be one of" in combined diff --git a/tests/test_core.py b/tests/test_core.py index b0c79f0..40eada0 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -8,6 +8,7 @@ from labelpull.core import ( FeatureRow, + export, flatten, read_export_file, summarize, @@ -117,3 +118,222 @@ def test_summarize_counts(species_rows: list[dict]) -> None: assert s.statuses == {"InReview": 1, "Done": 2} assert s.feature_kinds["radio"] == 2 assert s.latest_created_at == "2026-06-03T14:00:00Z" + + +# --- Fix 1: _latest_label missing-timestamp bug ---------------------------- + + +def test_latest_label_all_timestamped_newest_wins() -> None: + """When all labels have created_at, max by timestamp (newest) is returned.""" + dr = { + "data_row": {"id": "d1", "global_key": "img.JPG"}, + "projects": { + "p": { + "labels": [ + { + "label_details": { + "created_at": "2026-06-01T08:00:00Z", + "created_by": "early@x.org", + }, + "annotations": { + "classifications": [ + {"name": "Tag", "radio_answer": {"value": "old"}} + ], + "objects": [], + }, + }, + { + "label_details": { + "created_at": "2026-06-05T12:00:00Z", + "created_by": "reviewer@x.org", + }, + "annotations": { + "classifications": [ + {"name": "Tag", "radio_answer": {"value": "new"}} + ], + "objects": [], + }, + }, + ], + "project_details": {"workflow_status": "Done"}, + } + }, + } + feats = flatten(dr, "p") + tags = [f.value for f in feats if f.feature_name == "Tag"] + assert tags == ["new"] # newest timestamp wins + assert all(f.labeled_by == "reviewer@x.org" for f in feats if f.feature_kind != "label") + + +def test_latest_label_missing_timestamp_falls_back_to_last() -> None: + """When any label lacks created_at, last label in array is returned (not lexicographic winner).""" + dr = { + "data_row": {"id": "d2", "global_key": "img2.JPG"}, + "projects": { + "p": { + "labels": [ + { + # No created_at — would produce "" which sorts below any date string. + # Under old max() logic this label would NEVER win even if it's the + # real latest; under the new rule it triggers the fallback. + "label_details": {"created_by": "ann@x.org"}, + "annotations": { + "classifications": [ + {"name": "Tag", "radio_answer": {"value": "first"}} + ], + "objects": [], + }, + }, + { + "label_details": { + "created_at": "2026-01-01T00:00:00Z", # very old date + "created_by": "reviewer@x.org", + }, + "annotations": { + "classifications": [ + {"name": "Tag", "radio_answer": {"value": "lexwinner"}} + ], + "objects": [], + }, + }, + ], + "project_details": {"workflow_status": "InReview"}, + } + }, + } + feats = flatten(dr, "p") + tags = [f.value for f in feats if f.feature_name == "Tag"] + # Export-order fallback: labels[-1] is the reviewer's label (index 1). + # Under the broken max() logic, the timestamped label ("2026-01-01") would + # win because "" < any date string, returning "lexwinner" which is wrong. + assert tags == ["lexwinner"] # last in array + assert all(f.labeled_by == "reviewer@x.org" for f in feats if f.feature_kind != "label") + + +# --- Fix 2: NDJSON parse error context ------------------------------------ + + +def test_read_export_file_ndjson_malformed_line_reports_number(tmp_path: Path) -> None: + """A malformed NDJSON line raises ValueError naming the offending line number.""" + bad = tmp_path / "bad.ndjson" + bad.write_text('{"ok": 1}\nNOT_JSON\n') + with pytest.raises(ValueError, match=r"line 2"): + read_export_file(bad) + + +# --- Contract tests: documented-but-untested branches -------------------- + + +def test_relationships_annotation_yields_feature_kind_relationship() -> None: + """A 'relationships' annotation produces feature_kind='relationship'.""" + dr = { + "data_row": {"id": "d3", "global_key": "img3.JPG"}, + "projects": { + "p": { + "labels": [ + { + "label_details": { + "created_at": "2026-06-01T00:00:00Z", + "created_by": "ann@x.org", + }, + "annotations": { + "classifications": [], + "objects": [], + "relationships": [ + { + "name": "linked_to", + "relationship": {"source": "f1", "target": "f2"}, + } + ], + }, + } + ], + "project_details": {"workflow_status": "Done"}, + } + }, + } + feats = flatten(dr, "p") + rel_feats = [f for f in feats if f.feature_kind == "relationship"] + assert len(rel_feats) == 1 + assert rel_feats[0].feature_name == "linked_to" + + +def test_classification_with_no_known_answer_type_yields_unknown() -> None: + """A classification with no radio/checklist/text answer returns ('unknown', '').""" + dr = { + "data_row": {"id": "d4", "global_key": "img4.JPG"}, + "projects": { + "p": { + "labels": [ + { + "label_details": { + "created_at": "2026-06-01T00:00:00Z", + "created_by": "ann@x.org", + }, + "annotations": { + "classifications": [ + # No radio_answer, checklist_answers, or text_answer. + {"name": "Mystery", "some_future_field": "value"} + ], + "objects": [], + }, + } + ], + "project_details": {"workflow_status": "Done"}, + } + }, + } + feats = flatten(dr, "p") + mystery = [f for f in feats if f.feature_name == "Mystery"] + assert len(mystery) == 1 + assert mystery[0].feature_kind == "unknown" + assert mystery[0].value == "" + + +def test_object_with_unrecognised_geometry_yields_unknown() -> None: + """An object with no recognised geometry key returns ('unknown', '').""" + dr = { + "data_row": {"id": "d5", "global_key": "img5.JPG"}, + "projects": { + "p": { + "labels": [ + { + "label_details": { + "created_at": "2026-06-01T00:00:00Z", + "created_by": "ann@x.org", + }, + "annotations": { + "classifications": [], + "objects": [ + { + "feature_id": "f_x", + "name": "FutureShape", + "sphere": {"radius": 5}, # not in _GEOMETRY_KINDS + "classifications": [], + } + ], + }, + } + ], + "project_details": {"workflow_status": "Done"}, + } + }, + } + feats = flatten(dr, "p") + shape = [f for f in feats if f.feature_name == "FutureShape"] + assert len(shape) == 1 + assert shape[0].feature_kind == "unknown" + assert shape[0].value == "" + + +def test_export_without_api_key_raises_runtime_error(monkeypatch: pytest.MonkeyPatch) -> None: + """export('p', client=None) with LABELBOX_API_KEY unset raises RuntimeError with a clear message. + + Two cases: SDK absent -> message mentions 'labelpull[live]'; SDK present but no key -> + message mentions 'LABELBOX_API_KEY'. Either way it is a RuntimeError, not a bare + ImportError or AttributeError. + """ + monkeypatch.delenv("LABELBOX_API_KEY", raising=False) + with pytest.raises(RuntimeError): + # Consume the iterator to trigger the lazy client creation. + list(export("p", client=None)) From aec7f9529ab176c396a0f5434449154e7bc1dbac Mon Sep 17 00:00:00 2001 From: Wietze Date: Sat, 13 Jun 2026 09:11:35 -0400 Subject: [PATCH 2/2] style: ruff format --- src/labelpull/cli.py | 4 +--- tests/test_core.py | 8 ++------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/labelpull/cli.py b/src/labelpull/cli.py index 088abc8..1f587f6 100644 --- a/src/labelpull/cli.py +++ b/src/labelpull/cli.py @@ -61,9 +61,7 @@ def pull( ) -> None: """Export the latest annotations and flatten them to CSV, with a summary.""" if status is not None and status not in WORKFLOW_STATUSES: - raise typer.BadParameter( - f"--status must be one of {', '.join(WORKFLOW_STATUSES)}" - ) + raise typer.BadParameter(f"--status must be one of {', '.join(WORKFLOW_STATUSES)}") if schema not in ADAPTERS: raise typer.BadParameter(f"unknown schema {schema!r}; choose from {sorted(ADAPTERS)}") adapter = ADAPTERS[schema]() diff --git a/tests/test_core.py b/tests/test_core.py index 40eada0..0c4c3b5 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -136,9 +136,7 @@ def test_latest_label_all_timestamped_newest_wins() -> None: "created_by": "early@x.org", }, "annotations": { - "classifications": [ - {"name": "Tag", "radio_answer": {"value": "old"}} - ], + "classifications": [{"name": "Tag", "radio_answer": {"value": "old"}}], "objects": [], }, }, @@ -148,9 +146,7 @@ def test_latest_label_all_timestamped_newest_wins() -> None: "created_by": "reviewer@x.org", }, "annotations": { - "classifications": [ - {"name": "Tag", "radio_answer": {"value": "new"}} - ], + "classifications": [{"name": "Tag", "radio_answer": {"value": "new"}}], "objects": [], }, },