-
Notifications
You must be signed in to change notification settings - Fork 7
cli: add option to not get the all-altloc selection string from find_altloc_selections.py #221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,170 @@ | ||
| """ | ||
| Integration tests for ``scripts/eval/find_altloc_selections.py``. | ||
| """ | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import argparse | ||
| import importlib.util | ||
| from pathlib import Path | ||
|
|
||
| import pandas as pd | ||
| import pytest | ||
|
|
||
|
|
||
| _SCRIPT_PATH = ( | ||
| Path(__file__).resolve().parents[2] / "scripts" / "eval" / "find_altloc_selections.py" | ||
| ) | ||
|
|
||
|
|
||
| def _load_script(): | ||
| """Import the script module by path so tests don't require it to be | ||
| installed on ``sys.path``.""" | ||
| spec = importlib.util.spec_from_file_location("find_altloc_selections_script", _SCRIPT_PATH) | ||
| assert spec is not None and spec.loader is not None | ||
| mod = importlib.util.module_from_spec(spec) | ||
| spec.loader.exec_module(mod) | ||
| return mod | ||
|
|
||
|
|
||
| @pytest.fixture | ||
| def find_altloc_script(): | ||
| return _load_script() | ||
|
|
||
|
|
||
| def _altloc_input_csv(tmp_path: Path, resources_dir: Path, *, n_rows: int = 1) -> Path: | ||
| """Write an ``n_rows``-row input CSV pointing at the 1vme altloc CIF. | ||
|
|
||
| Skips the test if the CIF resource is missing. | ||
| """ | ||
| cif_path = resources_dir / "1vme" / "1vme_final_carved_edited_0.5occA_0.5occB.cif" | ||
| map_path = resources_dir / "1vme" / "1vme_final_carved_edited_0.5occA_0.5occB_1.80A.ccp4" | ||
| if not cif_path.exists(): | ||
| pytest.skip(f"Test resource not found: {cif_path}") | ||
| rows = [ | ||
| { | ||
| "name": f"1VME_{i}", | ||
| "structure": str(cif_path), | ||
| "density": str(map_path), | ||
| "resolution": 1.8, | ||
| } | ||
| for i in range(n_rows) | ||
| ] | ||
| input_csv = tmp_path / "input.csv" | ||
| pd.DataFrame(rows).to_csv(input_csv, index=False) | ||
| return input_csv | ||
|
|
||
|
|
||
| def _make_args( | ||
| *, | ||
| input_csv: Path, | ||
| output_file: Path, | ||
| min_span: int = 5, | ||
| altloc_label: str = "label_alt_id", | ||
| include_all_altlocs: bool = True, | ||
| ) -> argparse.Namespace: | ||
| return argparse.Namespace( | ||
| input_csv=input_csv, | ||
| output_file=output_file, | ||
| min_span=min_span, | ||
| altloc_label=altloc_label, | ||
| include_all_altlocs=include_all_altlocs, | ||
| ) | ||
|
|
||
|
|
||
| _EXPECTED_COLS = { | ||
| "protein", | ||
| "selection", | ||
| "structure_pattern", | ||
| "map_pattern", | ||
| "base_map_dir", | ||
| "resolution", | ||
| } | ||
|
|
||
|
|
||
| @pytest.mark.slow | ||
| def test_main_writes_expected_output_columns_and_derived_paths( | ||
| tmp_path: Path, resources_dir: Path, find_altloc_script | ||
| ): | ||
| """Two-row happy path: verifies output schema, derived path columns, and | ||
| that both old- and new-style selections appear with default flags.""" | ||
| input_csv = _altloc_input_csv(tmp_path, resources_dir, n_rows=2) | ||
| output_file = tmp_path / "output.csv" | ||
|
|
||
| find_altloc_script.main(_make_args(input_csv=input_csv, output_file=output_file)) | ||
|
|
||
| assert output_file.exists() | ||
| df = pd.read_csv(output_file) | ||
|
|
||
| assert set(df.columns) == _EXPECTED_COLS | ||
| assert len(df) == 2 | ||
|
|
||
| for i, row in df.iterrows(): | ||
| assert row["protein"] == f"1VME_{i}" | ||
| assert row["structure_pattern"] == "1vme_final_carved_edited_0.5occA_0.5occB.cif" | ||
| assert row["map_pattern"] == "1vme_final_carved_edited_0.5occA_0.5occB_1.80A.ccp4" | ||
| assert row["base_map_dir"] == "1vme" | ||
| assert row["resolution"] == pytest.approx(1.8) | ||
|
|
||
| entries = row["selection"].split(";") | ||
| assert any(s.startswith("chain ") and " and resi " in s for s in entries), ( | ||
| f"expected at least one old-style selection in {entries}" | ||
| ) | ||
| assert any("chain_id == " in s for s in entries), ( | ||
| f"expected at least one new-style selection in {entries} " | ||
| "(include_all_altlocs default is True)" | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.slow | ||
| def test_no_all_altlocs_flag_omits_per_chain_selection( | ||
| tmp_path: Path, resources_dir: Path, find_altloc_script | ||
| ): | ||
| """``include_all_altlocs=False`` (CLI ``--no-all-altlocs``) suppresses the | ||
| per-chain new-style selection; only old-style entries (if any) remain.""" | ||
| input_csv = _altloc_input_csv(tmp_path, resources_dir, n_rows=1) | ||
| output_file = tmp_path / "output.csv" | ||
|
|
||
| find_altloc_script.main( | ||
| _make_args(input_csv=input_csv, output_file=output_file, include_all_altlocs=False) | ||
| ) | ||
|
|
||
| df = pd.read_csv(output_file) | ||
| selection = df.iloc[0]["selection"] | ||
| selection_str = "" if pd.isna(selection) else selection | ||
| assert "chain_id == " not in selection_str | ||
| if selection_str: | ||
| entries = selection_str.split(";") | ||
| assert all(s.startswith("chain ") and " and resi " in s for s in entries), ( | ||
| f"only old-style entries should remain; got {entries}" | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.slow | ||
| def test_large_min_span_with_no_all_altlocs_yields_empty_selection( | ||
| tmp_path: Path, resources_dir: Path, find_altloc_script | ||
| ): | ||
| """An impossibly large ``min_span`` paired with ``include_all_altlocs=False`` | ||
| drops every selection. The row is still written with derived columns | ||
| populated and an empty ``selection`` cell.""" | ||
| input_csv = _altloc_input_csv(tmp_path, resources_dir, n_rows=1) | ||
| output_file = tmp_path / "output.csv" | ||
|
|
||
| find_altloc_script.main( | ||
| _make_args( | ||
| input_csv=input_csv, | ||
| output_file=output_file, | ||
| min_span=10**6, | ||
| include_all_altlocs=False, | ||
| ) | ||
| ) | ||
|
|
||
| df = pd.read_csv(output_file) | ||
| assert len(df) == 1 | ||
| row = df.iloc[0] | ||
|
|
||
| assert pd.isna(row["selection"]) or row["selection"] == "" | ||
| assert row["protein"] == "1VME_0" | ||
| assert row["structure_pattern"] == "1vme_final_carved_edited_0.5occA_0.5occB.cif" | ||
| assert row["map_pattern"] == "1vme_final_carved_edited_0.5occA_0.5occB_1.80A.ccp4" | ||
| assert row["base_map_dir"] == "1vme" |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should update this too.