From ddbe78d69fb45fad884f1358b9cc1233f33edb63 Mon Sep 17 00:00:00 2001
From: Wietze <wietze.suijker@gmail.com>
Date: Sat, 13 Jun 2026 09:10:18 -0400
Subject: [PATCH 1/2] fix: correct latest-label tiebreak, NDJSON errors, status
 validation, honest summary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Audit fixes on the export/flatten core (all with tests; 18 -> 27).

- _latest_label: when any label lacks a created_at, the old max() let a
  timestamped label beat a missing-timestamp one lexicographically — it could
  return the annotator's label over the reviewer's. Use created_at when every
  label has one; otherwise fall back to export order (last = newest).
- read_export_file: a malformed NDJSON line now raises with its line number
  instead of a bare JSONDecodeError traceback.
- CLI --status: validate against WORKFLOW_STATUSES and fail at the boundary with
  a helpful message instead of passing a typo to the Labelbox API.
- Summary: the CLI printed "<n> reached unlabelled", but the count includes
  never-reached ToLabel rows; print "<n> without labels" and fix the field
  comment. Field name unchanged (rename would break the public API).
- Lock the documented FeatureRow contract with tests: relationship annotations,
  the classification/geometry "unknown" fallbacks, and the missing-API-key error.
---
 src/labelpull/cli.py  |   7 +-
 src/labelpull/core.py |  31 +++++-
 tests/test_cli.py     |  43 +++++++++
 tests/test_core.py    | 220 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 296 insertions(+), 5 deletions(-)
diff --git a/src/labelpull/cli.py b/src/labelpull/cli.py
index e4920a5..088abc8 100644
--- a/src/labelpull/cli.py
+++ b/src/labelpull/cli.py
@@ -15,6 +15,7 @@
 from labelpull import __version__
 from labelpull.adapters import ADAPTERS, write_csv
 from labelpull.core import (
+    WORKFLOW_STATUSES,
     FeatureRow,
     JsonDict,
     _created_at,
@@ -59,6 +60,10 @@ def pull(
     api_key: str | None = typer.Option(None, help="Labelbox API key (else LABELBOX_API_KEY)."),
 ) -> None:
     """Export the latest annotations and flatten them to CSV, with a summary."""
+    if status is not None and status not in WORKFLOW_STATUSES:
+        raise typer.BadParameter(
+            f"--status must be one of {', '.join(WORKFLOW_STATUSES)}"
+        )
     if schema not in ADAPTERS:
         raise typer.BadParameter(f"unknown schema {schema!r}; choose from {sorted(ADAPTERS)}")
     adapter = ADAPTERS[schema]()
@@ -87,7 +92,7 @@ def _print_summary(rows: list[JsonDict], features: list[FeatureRow]) -> None:
     s = summarize(rows, features)
     typer.echo(
         f"  {s.n_labelled} labelled / {s.n_data_rows} rows "
-        f"({s.n_reached_unlabelled} reached unlabelled)"
+        f"({s.n_reached_unlabelled} without labels)"
     )
     if s.statuses:
         typer.echo("  status: " + ", ".join(f"{k}={v}" for k, v in sorted(s.statuses.items())))
diff --git a/src/labelpull/core.py b/src/labelpull/core.py
index bf372e0..388de7b 100644
--- a/src/labelpull/core.py
+++ b/src/labelpull/core.py
@@ -99,7 +99,15 @@ def read_export_file(path: str | Path) -> list[JsonDict]:
         loaded = json.loads(text)
         return loaded if isinstance(loaded, list) else [loaded]
     except json.JSONDecodeError:
-        return [json.loads(line) for line in text.splitlines() if line.strip()]
+        result: list[JsonDict] = []
+        for i, line in enumerate(text.splitlines(), start=1):
+            if not line.strip():
+                continue
+            try:
+                result.append(json.loads(line))
+            except json.JSONDecodeError as e:
+                raise ValueError(f"malformed NDJSON on line {i}: {e}") from e
+        return result
 
 
 def flatten(dr: JsonDict, project_id: str | None = None) -> list[FeatureRow]:
@@ -168,7 +176,7 @@ class Summary:
 
     n_data_rows: int
     n_labelled: int
-    n_reached_unlabelled: int
+    n_reached_unlabelled: int  # rows without labels, regardless of workflow stage
     feature_kinds: dict[str, int]
     feature_names: dict[str, int]
     statuses: dict[str, int]
@@ -236,11 +244,26 @@ def _select_project(dr: JsonDict, project_id: str | None) -> JsonDict:
 
 
 def _latest_label(proj: JsonDict) -> JsonDict | None:
-    # A QC-reviewed row carries the annotator's label *and* the reviewer's; the
-    # verified answer is the most recently created, not labels[0].
+    """Return the most-authoritative label from the project export block.
+
+    A QC-reviewed row carries the annotator's label *and* the reviewer's; the
+    verified answer is the most recently created, not labels[0].
+
+    Selection rules:
+    - If ALL labels have a non-empty ``created_at``, return the one with the
+      maximum (newest) timestamp.
+    - Otherwise, at least one label is missing a timestamp. Fall back to export
+      order and return ``labels[-1]``. Export order is last-in-newest by
+      Labelbox convention, so this is a best-effort approximation when
+      timestamps are absent.
+    """
     labels = proj.get("labels") or []
     if not labels:
         return None
+    # Fall back to export order when any label is missing a timestamp.
+    # (Export order: last-in == newest; deliberate best-effort fallback.)
+    if not all(_created_at_of_label(lbl) for lbl in labels):
+        return cast("JsonDict", labels[-1])
     return cast("JsonDict", max(labels, key=_created_at_of_label))
 
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 56523aa..618a38b 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -116,3 +116,46 @@ def test_cli_unknown_schema_errors() -> None:
         ["pull", "p", "--schema", "nope", "--from-export", str(FIXTURES / "species_export.ndjson")],
     )
     assert result.exit_code != 0
+
+
+# --- Fix 3: CLI wording ------------------------------------------------------
+
+
+def test_cli_summary_says_without_labels_not_reached_unlabelled(tmp_path: Path) -> None:
+    """CLI output uses 'without labels' (not 'reached unlabelled') for unlabelled count."""
+    out = tmp_path / "labels.csv"
+    result = runner.invoke(
+        app,
+        [
+            "pull",
+            "proj_x",
+            "--from-export",
+            str(FIXTURES / "species_export.ndjson"),
+            "-o",
+            str(out),
+        ],
+    )
+    assert result.exit_code == 0, result.output
+    assert "without labels" in result.output
+    assert "reached unlabelled" not in result.output
+
+
+# --- Fix 4: --status validation ----------------------------------------------
+
+
+def test_cli_invalid_status_exits_nonzero_with_helpful_message() -> None:
+    """--status bogus exits non-zero and includes the valid choices in the message."""
+    result = runner.invoke(
+        app,
+        [
+            "pull",
+            "p",
+            "--status",
+            "bogus",
+            "--from-export",
+            str(FIXTURES / "species_export.ndjson"),
+        ],
+    )
+    assert result.exit_code != 0
+    combined = (result.output or "") + str(result.exception or "")
+    assert "ToLabel" in combined or "must be one of" in combined
diff --git a/tests/test_core.py b/tests/test_core.py
index b0c79f0..40eada0 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -8,6 +8,7 @@
 
 from labelpull.core import (
     FeatureRow,
+    export,
     flatten,
     read_export_file,
     summarize,
@@ -117,3 +118,222 @@ def test_summarize_counts(species_rows: list[dict]) -> None:
     assert s.statuses == {"InReview": 1, "Done": 2}
     assert s.feature_kinds["radio"] == 2
     assert s.latest_created_at == "2026-06-03T14:00:00Z"
+
+
+# --- Fix 1: _latest_label missing-timestamp bug ----------------------------
+
+
+def test_latest_label_all_timestamped_newest_wins() -> None:
+    """When all labels have created_at, max by timestamp (newest) is returned."""
+    dr = {
+        "data_row": {"id": "d1", "global_key": "img.JPG"},
+        "projects": {
+            "p": {
+                "labels": [
+                    {
+                        "label_details": {
+                            "created_at": "2026-06-01T08:00:00Z",
+                            "created_by": "early@x.org",
+                        },
+                        "annotations": {
+                            "classifications": [
+                                {"name": "Tag", "radio_answer": {"value": "old"}}
+                            ],
+                            "objects": [],
+                        },
+                    },
+                    {
+                        "label_details": {
+                            "created_at": "2026-06-05T12:00:00Z",
+                            "created_by": "reviewer@x.org",
+                        },
+                        "annotations": {
+                            "classifications": [
+                                {"name": "Tag", "radio_answer": {"value": "new"}}
+                            ],
+                            "objects": [],
+                        },
+                    },
+                ],
+                "project_details": {"workflow_status": "Done"},
+            }
+        },
+    }
+    feats = flatten(dr, "p")
+    tags = [f.value for f in feats if f.feature_name == "Tag"]
+    assert tags == ["new"]  # newest timestamp wins
+    assert all(f.labeled_by == "reviewer@x.org" for f in feats if f.feature_kind != "label")
+
+
+def test_latest_label_missing_timestamp_falls_back_to_last() -> None:
+    """When any label lacks created_at, last label in array is returned (not lexicographic winner)."""
+    dr = {
+        "data_row": {"id": "d2", "global_key": "img2.JPG"},
+        "projects": {
+            "p": {
+                "labels": [
+                    {
+                        # No created_at — would produce "" which sorts below any date string.
+                        # Under old max() logic this label would NEVER win even if it's the
+                        # real latest; under the new rule it triggers the fallback.
+                        "label_details": {"created_by": "ann@x.org"},
+                        "annotations": {
+                            "classifications": [
+                                {"name": "Tag", "radio_answer": {"value": "first"}}
+                            ],
+                            "objects": [],
+                        },
+                    },
+                    {
+                        "label_details": {
+                            "created_at": "2026-01-01T00:00:00Z",  # very old date
+                            "created_by": "reviewer@x.org",
+                        },
+                        "annotations": {
+                            "classifications": [
+                                {"name": "Tag", "radio_answer": {"value": "lexwinner"}}
+                            ],
+                            "objects": [],
+                        },
+                    },
+                ],
+                "project_details": {"workflow_status": "InReview"},
+            }
+        },
+    }
+    feats = flatten(dr, "p")
+    tags = [f.value for f in feats if f.feature_name == "Tag"]
+    # Export-order fallback: labels[-1] is the reviewer's label (index 1).
+    # Under the broken max() logic, the timestamped label ("2026-01-01") would
+    # win because "" < any date string, returning "lexwinner" which is wrong.
+    assert tags == ["lexwinner"]  # last in array
+    assert all(f.labeled_by == "reviewer@x.org" for f in feats if f.feature_kind != "label")
+
+
+# --- Fix 2: NDJSON parse error context ------------------------------------
+
+
+def test_read_export_file_ndjson_malformed_line_reports_number(tmp_path: Path) -> None:
+    """A malformed NDJSON line raises ValueError naming the offending line number."""
+    bad = tmp_path / "bad.ndjson"
+    bad.write_text('{"ok": 1}\nNOT_JSON\n')
+    with pytest.raises(ValueError, match=r"line 2"):
+        read_export_file(bad)
+
+
+# --- Contract tests: documented-but-untested branches --------------------
+
+
+def test_relationships_annotation_yields_feature_kind_relationship() -> None:
+    """A 'relationships' annotation produces feature_kind='relationship'."""
+    dr = {
+        "data_row": {"id": "d3", "global_key": "img3.JPG"},
+        "projects": {
+            "p": {
+                "labels": [
+                    {
+                        "label_details": {
+                            "created_at": "2026-06-01T00:00:00Z",
+                            "created_by": "ann@x.org",
+                        },
+                        "annotations": {
+                            "classifications": [],
+                            "objects": [],
+                            "relationships": [
+                                {
+                                    "name": "linked_to",
+                                    "relationship": {"source": "f1", "target": "f2"},
+                                }
+                            ],
+                        },
+                    }
+                ],
+                "project_details": {"workflow_status": "Done"},
+            }
+        },
+    }
+    feats = flatten(dr, "p")
+    rel_feats = [f for f in feats if f.feature_kind == "relationship"]
+    assert len(rel_feats) == 1
+    assert rel_feats[0].feature_name == "linked_to"
+
+
+def test_classification_with_no_known_answer_type_yields_unknown() -> None:
+    """A classification with no radio/checklist/text answer returns ('unknown', '')."""
+    dr = {
+        "data_row": {"id": "d4", "global_key": "img4.JPG"},
+        "projects": {
+            "p": {
+                "labels": [
+                    {
+                        "label_details": {
+                            "created_at": "2026-06-01T00:00:00Z",
+                            "created_by": "ann@x.org",
+                        },
+                        "annotations": {
+                            "classifications": [
+                                # No radio_answer, checklist_answers, or text_answer.
+                                {"name": "Mystery", "some_future_field": "value"}
+                            ],
+                            "objects": [],
+                        },
+                    }
+                ],
+                "project_details": {"workflow_status": "Done"},
+            }
+        },
+    }
+    feats = flatten(dr, "p")
+    mystery = [f for f in feats if f.feature_name == "Mystery"]
+    assert len(mystery) == 1
+    assert mystery[0].feature_kind == "unknown"
+    assert mystery[0].value == ""
+
+
+def test_object_with_unrecognised_geometry_yields_unknown() -> None:
+    """An object with no recognised geometry key returns ('unknown', '')."""
+    dr = {
+        "data_row": {"id": "d5", "global_key": "img5.JPG"},
+        "projects": {
+            "p": {
+                "labels": [
+                    {
+                        "label_details": {
+                            "created_at": "2026-06-01T00:00:00Z",
+                            "created_by": "ann@x.org",
+                        },
+                        "annotations": {
+                            "classifications": [],
+                            "objects": [
+                                {
+                                    "feature_id": "f_x",
+                                    "name": "FutureShape",
+                                    "sphere": {"radius": 5},  # not in _GEOMETRY_KINDS
+                                    "classifications": [],
+                                }
+                            ],
+                        },
+                    }
+                ],
+                "project_details": {"workflow_status": "Done"},
+            }
+        },
+    }
+    feats = flatten(dr, "p")
+    shape = [f for f in feats if f.feature_name == "FutureShape"]
+    assert len(shape) == 1
+    assert shape[0].feature_kind == "unknown"
+    assert shape[0].value == ""
+
+
+def test_export_without_api_key_raises_runtime_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    """export('p', client=None) with LABELBOX_API_KEY unset raises RuntimeError with a clear message.
+
+    Two cases: SDK absent -> message mentions 'labelpull[live]'; SDK present but no key ->
+    message mentions 'LABELBOX_API_KEY'. Either way it is a RuntimeError, not a bare
+    ImportError or AttributeError.
+    """
+    monkeypatch.delenv("LABELBOX_API_KEY", raising=False)
+    with pytest.raises(RuntimeError):
+        # Consume the iterator to trigger the lazy client creation.
+        list(export("p", client=None))

From aec7f9529ab176c396a0f5434449154e7bc1dbac Mon Sep 17 00:00:00 2001
From: Wietze <wietze.suijker@gmail.com>
Date: Sat, 13 Jun 2026 09:11:35 -0400
Subject: [PATCH 2/2] style: ruff format

---
 src/labelpull/cli.py | 4 +---
 tests/test_core.py   | 8 ++------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/labelpull/cli.py b/src/labelpull/cli.py
index 088abc8..1f587f6 100644
--- a/src/labelpull/cli.py
+++ b/src/labelpull/cli.py
@@ -61,9 +61,7 @@ def pull(
 ) -> None:
     """Export the latest annotations and flatten them to CSV, with a summary."""
     if status is not None and status not in WORKFLOW_STATUSES:
-        raise typer.BadParameter(
-            f"--status must be one of {', '.join(WORKFLOW_STATUSES)}"
-        )
+        raise typer.BadParameter(f"--status must be one of {', '.join(WORKFLOW_STATUSES)}")
     if schema not in ADAPTERS:
         raise typer.BadParameter(f"unknown schema {schema!r}; choose from {sorted(ADAPTERS)}")
     adapter = ADAPTERS[schema]()
diff --git a/tests/test_core.py b/tests/test_core.py
index 40eada0..0c4c3b5 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -136,9 +136,7 @@ def test_latest_label_all_timestamped_newest_wins() -> None:
                             "created_by": "early@x.org",
                         },
                         "annotations": {
-                            "classifications": [
-                                {"name": "Tag", "radio_answer": {"value": "old"}}
-                            ],
+                            "classifications": [{"name": "Tag", "radio_answer": {"value": "old"}}],
                             "objects": [],
                         },
                     },
@@ -148,9 +146,7 @@ def test_latest_label_all_timestamped_newest_wins() -> None:
                             "created_by": "reviewer@x.org",
                         },
                         "annotations": {
-                            "classifications": [
-                                {"name": "Tag", "radio_answer": {"value": "new"}}
-                            ],
+                            "classifications": [{"name": "Tag", "radio_answer": {"value": "new"}}],
                             "objects": [],
                         },
                     },