Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion surface_morphometrics/accept_refinement.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,16 @@ def accept_one(work_dir, basename, step, radius_hit, dry_run):
continue
_remove(path, dry_run)

# 4. Warn if the promoted surface still needs a curvature graph.
# 4. Record which iteration was accepted (the per-iteration files are gone now,
# so this one-line marker is how `morphometrics status` reports the step).
if not dry_run:
with open(f"{work_dir}{basename}.accepted_iter", "w") as handle:
handle.write(f"{step}\n")
else:
print(f" [dry-run] would record accepted iteration {step} "
f"in {basename}.accepted_iter")

# 5. Warn if the promoted surface still needs a curvature graph.
if not has_avv:
print(f" WARNING: iteration {step} was a lightweight (xcorr) iteration with no "
f"curvature graph.")
Expand Down
7 changes: 6 additions & 1 deletion surface_morphometrics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
_LAZY_COMMANDS = {
"fetch_example": ("surface_morphometrics.fetch_example:fetch_example_cli",
"Download example data for testing (small Zenodo set or full EMPIAR data)."),
"validate": ("surface_morphometrics.validate:validate_cli",
"Check a config and its seg/tomo folders are consistent before running."),
"status": ("surface_morphometrics.status:status_cli",
"Summarize what has been computed for each segmentation in the dataset."),
"make_meshes": ("surface_morphometrics.segmentation_to_meshes:make_meshes_cli",
"Convert segmentation MRCs into membrane meshes (step 1)."),
"pycurv": ("surface_morphometrics.run_pycurv:run_pycurv_cli",
Expand Down Expand Up @@ -60,7 +64,7 @@
# so the standard pipeline order is obvious. `new_config` is registered eagerly
# but listed here for display.
_SECTIONS = [
("Setup", ["new_config", "fetch_example"]),
("Setup", ["new_config", "validate", "fetch_example", "status"]),
("Pipeline (run in this order)",
["make_meshes", "pycurv", "distances_orientations", "sample_density", "measure_thickness"]),
("Optional mesh refinement (after pycurv, before distances)",
Expand All @@ -75,6 +79,7 @@
_NEXT_HINTS = {
"new_config": "Edit the generated config.yml, then run: morphometrics make_meshes config.yml",
"fetch_example": "Next: cd into the example folder, then run: morphometrics make_meshes config.yml",
"validate": "Next: if validation passed, run: morphometrics make_meshes config.yml",
"make_meshes": "Next: morphometrics pycurv config.yml <name>.surface.vtp (run per-surface, in parallel on a cluster if you can)",
"pycurv": "Next: morphometrics distances_orientations config.yml (or first refine: morphometrics refine_mesh config.yml)",
"refine_mesh": "Next: inspect the convergence plots, then commit one iteration: morphometrics accept_refinement config.yml <step>",
Expand Down
141 changes: 141 additions & 0 deletions surface_morphometrics/status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#! /usr/bin/env python
"""Summarize what has been computed for each segmentation in a dataset.

``status config.yml`` scans the work_dir and reports, per segmentation and per
membrane surface, which steps have run: meshing, curvature, refinement (and the
accepted iteration), and which distance / orientation / thickness measurements
exist. Everything is *derived* from the files on disk (and the per-surface CSV
column headers), so the report always reflects the current state -- nothing is
maintained separately and there is no state to get stale.

What each item is read from:
* mesh -- {surface}.surface.vtp exists
* curvature -- {surface}.AVV_rh{rh}.gt exists
* refinement -- the {surface}.accepted_iter marker (written by accept_refinement),
or leftover _refined_iter* files (run but not yet accepted)
* self-dist / verticality / inter / thickness -- columns in {surface}.AVV_rh{rh}.csv

Usage:
morphometrics status config.yml
morphometrics status config.yml --output status.txt
"""

__author__ = "Benjamin Barad"
__email__ = "benjamin.barad@gmail.com"
__license__ = "GPLv3"

import datetime
import os
import re
from glob import glob

import click

from .config_utils import load_config

DONE = "✓" # check mark
NONE = "—" # em dash


def _csv_columns(path):
"""Column names from a CSV header (reads only the first line)."""
try:
with open(path) as handle:
return set(handle.readline().rstrip("\n").split(","))
except OSError:
return set()


def _refine_status(work_dir, surface, radius_hit):
"""Human-readable refinement state for one surface."""
marker = f"{work_dir}{surface}.accepted_iter"
if os.path.exists(marker):
try:
return f"accepted@iter{open(marker).read().strip()}"
except OSError:
return "accepted"
iters = glob(f"{work_dir}{surface}_refined_iter*.surface.vtp")
if iters:
steps = sorted(int(m.group(1)) for m in
(re.search(r"_refined_iter(\d+)\.surface\.vtp$", p) for p in iters) if m)
span = f"iter{steps[0]}-{steps[-1]}" if len(steps) > 1 else f"iter{steps[0]}"
return f"run ({span}, not accepted)"
if os.path.exists(f"{work_dir}{surface}.AVV_rh{radius_hit}.gt.orig.bak"):
return "accepted (step unknown)"
return NONE


@click.command()
@click.argument("configfile", type=click.Path(exists=True))
@click.option("-o", "--output", "output_path", default=None, type=click.Path(),
help="Also write the summary to this text file.")
def status_cli(configfile, output_path):
"""Report what has been computed for each segmentation (derived from work_dir)."""
config = load_config(configfile, require=("seg_dir", "work_dir", "segmentation_values"))
seg_dir = config["seg_dir"]
work_dir = config["work_dir"]
tomo_dir = config.get("tomo_dir")
labels = list(config["segmentation_values"])
radius_hit = config["curvature_measurements"]["radius_hit"]

lines = []

def emit(text=""):
lines.append(text)

emit(f"Status {NONE} {work_dir} (radius_hit {radius_hit}) "
f"{datetime.date.today().isoformat()}")
emit()

seg_files = sorted(glob(f"{seg_dir}*.mrc"))
tomo_bases = ([os.path.basename(t)[:-4] for t in sorted(glob(f"{tomo_dir}*.mrc"))]
if tomo_dir and os.path.isdir(tomo_dir) else [])

if not seg_files:
emit(f"(no segmentations found in {seg_dir})")

for seg in seg_files:
seg_base = os.path.basename(seg)[:-4]
tnote = ""
if tomo_bases:
tnote = (f" [tomogram {DONE}]" if any(seg_base.startswith(t) for t in tomo_bases)
else " [tomogram missing]")
emit(f"{seg_base}{tnote}")

meshed = []
for label in labels:
surface = f"{seg_base}_{label}"
if not os.path.exists(f"{work_dir}{surface}.surface.vtp"):
continue
meshed.append(label)
curv = os.path.exists(f"{work_dir}{surface}.AVV_rh{radius_hit}.gt")
cols = _csv_columns(f"{work_dir}{surface}.AVV_rh{radius_hit}.csv") if curv else set()
refine = _refine_status(work_dir, surface, radius_hit)
partners = [o for o in labels if o != label and f"{o}_dist" in cols]
emit(f" {label:<8} mesh {DONE} curv {DONE if curv else NONE} "
f"refine {refine} self-dist {DONE if 'self_dist_min' in cols else NONE} "
f"vert {DONE if 'verticality' in cols else NONE} "
f"inter {', '.join(partners) if partners else NONE} "
f"thickness {DONE if 'thickness' in cols else NONE}")

if not meshed:
emit(" (no meshes yet)")
else:
missing = [label for label in labels if label not in meshed]
if missing:
emit(f" (no mesh for: {', '.join(missing)})")
emit()

emit(f"legend: {DONE} done {NONE} not done inter: partner surfaces with "
"distance/orientation measured")

report = "\n".join(lines)
click.echo(report)
if output_path:
with open(output_path, "w") as handle:
handle.write(report + "\n")
click.echo(f"\nWrote {output_path}")


if __name__ == "__main__":
status_cli()
189 changes: 189 additions & 0 deletions surface_morphometrics/validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
#! /usr/bin/env python
"""Validate that a config.yml and its input folders are set up consistently.

Run this before the pipeline to catch the common setup mistakes up front:

* ``seg_dir`` exists and holds segmentation MRCs (an error if not),
* ``tomo_dir`` exists (a *warning* only -- tomograms are needed for density
sampling, thickness, and refinement, but not for the curvature-only pipeline),
* each segmentation has a matching tomogram under the prefix convention the rest
of the pipeline uses (the tomogram name is a prefix of the segmentation name),
so a name mismatch surfaces here instead of as a silently skipped tomogram,
* which of the configured ``segmentation_values`` labels actually appear in each
segmentation (flagging segmentations where none of them appear, or where a label
value is present that the config does not map).

Usage:
morphometrics validate config.yml
"""

__author__ = "Benjamin Barad"
__email__ = "benjamin.barad@gmail.com"
__license__ = "GPLv3"

import os
import sys
from glob import glob

import click
import numpy as np

from .config_utils import load_config


def _present_label_values(path):
"""Set of integer label values present in a segmentation MRC (0/background dropped).

Reads slice-by-slice via mmap so a tomogram-sized volume does not have to be held
in memory at once. Returns None if the data could not be read.
"""
import mrcfile

present = set()
try:
with mrcfile.mmap(path, permissive=True, mode="r") as mrc:
data = mrc.data
if data is None:
return None
for plane in (data if data.ndim == 3 else [data]):
for value in np.unique(plane):
rounded = round(float(value))
if rounded != 0 and abs(float(value) - rounded) < 1e-3:
present.add(int(rounded))
except Exception:
return None
return present


@click.command()
@click.argument("configfile", type=click.Path(exists=True))
def validate_cli(configfile):
"""Check that CONFIGFILE and its seg/tomo folders are set up consistently.

Verifies the input directories exist, that every segmentation has a matching
tomogram, and reports which configured labels are present in each segmentation.
Exits non-zero if there are errors (warnings do not fail).
"""
config = load_config(configfile, require=("seg_dir", "segmentation_values"))
seg_dir = config["seg_dir"]
tomo_dir = config.get("tomo_dir")
seg_values = config["segmentation_values"]
value_to_name = {int(v): name for name, v in seg_values.items()}

errors = 0
warnings = 0

def error(msg):
nonlocal errors
errors += 1
click.echo(f" ERROR: {msg}")

def warn(msg):
nonlocal warnings
warnings += 1
click.echo(f" WARNING: {msg}")

def ok(msg):
click.echo(f" OK: {msg}")

click.echo(f"Validating {configfile}\n")

# --- input folders ---
click.echo("Folders:")
seg_dir_ok = os.path.isdir(seg_dir)
if seg_dir_ok:
ok(f"seg_dir exists: {seg_dir}")
else:
error(f"seg_dir does not exist: {seg_dir}")

tomo_note = "(needed only for density sampling, thickness, and refinement)"
tomo_present = bool(tomo_dir) and os.path.isdir(tomo_dir)
if not tomo_dir:
warn(f"tomo_dir is not set {tomo_note}")
elif tomo_present:
ok(f"tomo_dir exists: {tomo_dir}")
else:
warn(f"tomo_dir does not exist: {tomo_dir} {tomo_note}")

if not seg_dir_ok:
return _finish(errors, warnings)

seg_files = sorted(glob(seg_dir + "*.mrc"))
if not seg_files:
error(f"no .mrc segmentations found in seg_dir: {seg_dir}")
return _finish(errors, warnings)

tomo_bases = ([os.path.basename(t)[:-4] for t in sorted(glob(tomo_dir + "*.mrc"))]
if tomo_present else [])

click.echo("\nConfigured labels: "
+ ", ".join(f"{name}={value}" for name, value in seg_values.items()))
click.echo(f"\nSegmentations ({len(seg_files)} found):")
configured = set(value_to_name)

for seg in seg_files:
seg_base = os.path.basename(seg)[:-4]
click.echo(f"\n {os.path.basename(seg)}")

# Matching tomogram: its basename is a prefix of the segmentation basename.
if tomo_present:
matches = sorted((t for t in tomo_bases if seg_base.startswith(t)),
key=len, reverse=True)
if matches:
ok(f"matching tomogram: {matches[0]}.mrc")
else:
warn(f"no matching tomogram in {tomo_dir} (expected one whose name is a "
f"prefix of '{seg_base}'); density/thickness/refinement will skip "
"this segmentation -- check for a name mismatch")

# Which configured labels actually appear in the volume.
present = _present_label_values(seg)
if present is None:
warn("could not read MRC data to check label values")
continue
present_configured = sorted(configured & present)
if present_configured:
ok("labels present: "
+ ", ".join(f"{value_to_name[v]}({v})" for v in present_configured))
else:
warn("none of the configured label values appear in this segmentation -- "
"check segmentation_values")
absent = sorted(configured - present)
if absent:
click.echo(" configured but absent here: "
+ ", ".join(f"{value_to_name[v]}({v})" for v in absent))
extra = sorted(present - configured)
if extra:
warn(f"label value(s) present but not in segmentation_values: {extra}")

# The other direction: tomograms that match no segmentation. A tomogram is "used"
# if some segmentation name starts with it (the same prefix rule); an unmatched one
# is usually a name mismatch and will simply be skipped by the pipeline.
if tomo_present:
seg_bases = [os.path.basename(s)[:-4] for s in seg_files]
click.echo(f"\nTomograms ({len(tomo_bases)} found):")
orphans = [t for t in tomo_bases if not any(s.startswith(t) for s in seg_bases)]
if orphans:
for t in orphans:
warn(f"tomogram {t}.mrc matches no segmentation in {seg_dir} (no "
"segmentation name starts with it); it will not be used -- check "
"for a name mismatch")
else:
ok("every tomogram matches a segmentation")

return _finish(errors, warnings)


def _finish(errors, warnings):
click.echo("\n" + "-" * 60)
if errors:
click.echo(f"FAILED: {errors} error(s), {warnings} warning(s).")
sys.exit(1)
elif warnings:
click.echo(f"OK with {warnings} warning(s).")
else:
click.echo("All checks passed.")


if __name__ == "__main__":
validate_cli()
Loading
Loading