From 5fcae83c70d56108eefc903d3b9e7d123c7b8703 Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Thu, 18 Jun 2026 20:08:47 -0700 Subject: [PATCH 01/11] WIP: re-parameterize bilayer fit by center + half-separation Mesh refinement sometimes mis-assigns the membrane center and the surface diverges. The dual-Gaussian thickness fit previously let the two leaflet peak positions float independently, so both could collapse onto the same leaflet and yield a bogus center/thickness. Re-parameterize the fit by (center, half_sep): the two leaflet peaks sit at center +/- half_sep, with half_sep bounded to a physical 2-7 nm range, so one Gaussian is forced into each leaflet. Adds _dual_gaussian_centered and a find_peaks-based _seed_bilayer_center seeder in _thickness_worker, promotes the fit-quality thresholds to module-level constants, restricts per-triangle fits to a window around the seeded center, and applies the same parameterization to the global average fit in refine_mesh. Work in progress: divergence issue still under investigation. Co-Authored-By: Claude Opus 4.8 --- surface_morphometrics/_thickness_worker.py | 93 +++++++++++++++++----- surface_morphometrics/refine_mesh.py | 32 +++++--- 2 files changed, 93 insertions(+), 32 deletions(-) diff --git a/surface_morphometrics/_thickness_worker.py b/surface_morphometrics/_thickness_worker.py index ac5ee55..c5df293 100644 --- a/surface_morphometrics/_thickness_worker.py +++ b/surface_morphometrics/_thickness_worker.py @@ -8,6 +8,11 @@ import numpy as np import scipy.optimize as opt +# Bilayer fit-quality thresholds, shared by the per-triangle and global fits. +R2_THRESHOLD = 0.6 +MIN_THICKNESS = 2.0 # nm (minimum accepted peak-to-peak leaflet separation) +MAX_THICKNESS = 7.0 # nm (maximum accepted peak-to-peak leaflet separation) + def _monogaussian(x, h, c, w): """Single gaussian function.""" @@ -24,6 +29,46 @@ def _monogaussian_with_offset(x, h, c, w, o): return _monogaussian(x, h, c, w) + o +def _dual_gaussian_centered(x, h1, w1, h2, w2, center, half_sep, o): + """Dual Gaussian parameterized by the bilayer center and half-separation. + + The two leaflet peaks sit at ``center - half_sep`` and ``center + half_sep``, + so bounding ``half_sep`` to a physical range forces one Gaussian into each + leaflet -- the fit can never collapse both peaks into the same leaflet (the + failure mode of seeding both Gaussians at the global-max peak). ``center`` is + the membrane center (the refinement offset); ``2 * half_sep`` is the thickness. + """ + return (_monogaussian(x, h1, center - half_sep, w1) + + _monogaussian(x, h2, center + half_sep, w2) + o) + + +def _seed_bilayer_center(a, b, default_center=0.0): + """Estimate (center, half_sep) of a bilayer profile region for fit seeding. + + Locates the two tallest peaks in the profile (the two leaflets) and seeds the + center at their midpoint and half_sep at half their separation. Falls back to + ``default_center`` if fewer than two peaks are found. ``half_sep`` is clamped + to the physical thickness range. + """ + from scipy.signal import find_peaks + + a = np.asarray(a) + b = np.asarray(b) + peaks, _ = find_peaks(b) + if len(peaks) >= 2: + # the two tallest peaks are the leaflets + tallest = peaks[np.argsort(b[peaks])[-2:]] + p_left, p_right = sorted(a[tallest]) + center = 0.5 * (p_left + p_right) + half = 0.5 * (p_right - p_left) + elif len(peaks) == 1: + center, half = float(a[peaks[0]]), MIN_THICKNESS / 2.0 + else: + center, half = default_center, MIN_THICKNESS / 2.0 + half = float(np.clip(half, MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) + return float(center), half + + def _compute_r_squared(y_true, y_pred): """Compute R² (coefficient of determination).""" ss_res = np.sum((y_true - y_pred) ** 2) @@ -394,10 +439,8 @@ def fit_triangle_chunk_offsets(indices): [(offset, sigma1, sigma2, fit_method), ...] for each triangle in chunk. fit_method: 0=failed, 1=dual_gaussian, 2=single_gaussian, 3=centroid, 4=xcorr """ - # Fit quality thresholds - R2_THRESHOLD = 0.6 - MIN_THICKNESS = 2.0 # nm - MAX_THICKNESS = 7.0 # nm + # Fit-quality thresholds are module-level constants (R2_THRESHOLD, + # MIN_THICKNESS, MAX_THICKNESS). # Sample spacing — constant across all triangles, computed once per chunk sample_spacing = _worker_x[1] - _worker_x[0] if len(_worker_x) > 1 else 1.0 @@ -479,30 +522,42 @@ def fit_triangle_chunk_offsets(indices): best_sigma2 = np.nan best_method = 0 - # Step 1: Try dual Gaussian (unless monolayer mode) + # Step 1: Try dual Gaussian (unless monolayer mode). Parameterized by the + # bilayer center + half-separation so the two leaflet peaks are forced to + # opposite sides and cannot collapse into the same leaflet. if not _worker_monolayer: try: if _worker_global_fit_params is not None: - # Use global fit parameters for tighter, better-informed initialization + # Seed from the (more reliable) global average fit. c1_g, w1_g, c2_g, w2_g = _worker_global_fit_params - p0 = [0.02, c1_g, w1_g, 0.02, c2_g, w2_g, 0] - bounds = ([0.005, c1_g - 2, max(0.6, w1_g * 0.5), 0.005, c2_g - 2, max(0.6, w2_g * 0.5), -1], - [0.04, c1_g + 2, min(2.5, w1_g * 2.0), 0.04, c2_g + 2, min(2.5, w2_g * 2.0), 1]) + center_seed = 0.5 * (c1_g + c2_g) + half_seed = float(np.clip(0.5 * (c2_g - c1_g), + MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) + w1_seed, w2_seed = w1_g, w2_g else: - p0 = [0.02, ipk-0.5, 1.5, 0.02, ipk+0.5, 1.5, 0] - bounds = ([0.005, ipk-6, 0.8, 0.005, ipk-1.5, 0.8, -1], - [0.04, ipk+1.5, 2.2, 0.04, ipk+6, 2.2, 1]) - p3, _ = opt.curve_fit(_dual_gaussian, a, b, p0, bounds=bounds) + center_seed, half_seed = _seed_bilayer_center(a, b) + w1_seed = w2_seed = 1.5 + # Restrict the fit to a window around the seeded bilayer so distant + # density (adjacent membranes, CTF ringing) cannot pull the fit out. + fit_window = MAX_THICKNESS / 2.0 + 2.0 + wmask = (a >= center_seed - fit_window) & (a <= center_seed + fit_window) + af, bf = (a[wmask], b[wmask]) if int(wmask.sum()) >= 5 else (a, b) + + # params: h1, w1, h2, w2, center, half_sep, offset + p0 = [0.02, w1_seed, 0.02, w2_seed, center_seed, half_seed, 0.0] + bounds = ([0.005, 0.8, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], + [0.04, 2.2, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) + p3, _ = opt.curve_fit(_dual_gaussian_centered, af, bf, p0, bounds=bounds) # Validate fit - y_pred = _dual_gaussian(a, *p3) - r2 = _compute_r_squared(b, y_pred) - thickness = np.abs(p3[4] - p3[1]) + y_pred = _dual_gaussian_centered(af, *p3) + r2 = _compute_r_squared(bf, y_pred) + thickness = 2.0 * p3[5] if r2 > R2_THRESHOLD and MIN_THICKNESS <= thickness <= MAX_THICKNESS: - best_offset = (p3[1] + p3[4]) / 2 - best_sigma1 = p3[2] - best_sigma2 = p3[5] + best_offset = p3[4] # bilayer center + best_sigma1 = p3[1] + best_sigma2 = p3[3] best_method = 1 except Exception: pass diff --git a/surface_morphometrics/refine_mesh.py b/surface_morphometrics/refine_mesh.py index 9b0ec1b..270cdf5 100644 --- a/surface_morphometrics/refine_mesh.py +++ b/surface_morphometrics/refine_mesh.py @@ -45,7 +45,9 @@ from .sample_density import load_mrc, sample_density_single from .measure_thickness import find_mins, dual_gaussian, monogaussian from ._thickness_worker import (init_worker, fit_triangle_chunk_offsets, - init_local_thickness_worker, compute_thickness_chunk) + init_local_thickness_worker, compute_thickness_chunk, + _dual_gaussian_centered, _seed_bilayer_center, + MIN_THICKNESS, MAX_THICKNESS) from . import curvature @@ -838,24 +840,28 @@ def refine_mesh_iteration(graph_file, vtp_file, mrc_file, output_base, pixel_siz pre_disp_avg = np.mean(all_profiles_raw[valid_raw], axis=0) try: - ipk_g = x_positions[np.argmax(pre_disp_avg)] mid_g = len(pre_disp_avg) // 2 lm_g = np.argmin(pre_disp_avg[:mid_g]) rm_g = np.argmin(pre_disp_avg[mid_g:]) + mid_g ag = x_positions[lm_g + 2:rm_g - 2] bg = pre_disp_avg[lm_g + 2:rm_g - 2] if len(ag) >= 7: - p0g = [0.02, ipk_g - 0.5, 1.5, 0.02, ipk_g + 0.5, 1.5, 0] - bounds_g = ([0.005, ipk_g - 6, 0.8, 0.005, ipk_g - 1.5, 0.8, -1], - [0.04, ipk_g + 1.5, 2.2, 0.04, ipk_g + 6, 2.2, 1]) - popt_g, _ = opt.curve_fit(dual_gaussian, ag, bg, p0g, bounds=bounds_g) - global_center_offset = (popt_g[1] + popt_g[4]) / 2 - global_sigma1 = popt_g[2] - global_sigma2 = popt_g[5] - global_fit_params = (popt_g[1], popt_g[2], popt_g[4], popt_g[5]) - print(f" Global avg profile: c1={popt_g[1]:.3f} nm, c2={popt_g[4]:.3f} nm, " - f"midpoint={global_center_offset:+.3f} nm, " - f"thickness={abs(popt_g[4] - popt_g[1]):.3f} nm") + # Center + half-separation parameterization keeps the two leaflet + # peaks apart so the global fit cannot collapse into one leaflet. + center_seed, half_seed = _seed_bilayer_center(ag, bg) + p0g = [0.02, 1.5, 0.02, 1.5, center_seed, half_seed, 0] + bounds_g = ([0.005, 0.8, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], + [0.04, 2.2, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) + popt_g, _ = opt.curve_fit(_dual_gaussian_centered, ag, bg, p0g, bounds=bounds_g) + center_g, half_g = popt_g[4], popt_g[5] + c1_g, c2_g = center_g - half_g, center_g + half_g + global_center_offset = center_g + global_sigma1 = popt_g[1] + global_sigma2 = popt_g[3] + global_fit_params = (c1_g, popt_g[1], c2_g, popt_g[3]) + print(f" Global avg profile: c1={c1_g:.3f} nm, c2={c2_g:.3f} nm, " + f"center={global_center_offset:+.3f} nm, " + f"thickness={2.0 * half_g:.3f} nm") else: print(" Not enough points in fitting window for global avg dual Gaussian") except Exception as e: From 60893774a1186558e1dd5f19f653117734d67cb9 Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Sat, 20 Jun 2026 11:09:50 -0700 Subject: [PATCH 02/11] refine_mesh: window the global dual-Gaussian fit; add centering tests - Restrict the global-average dual-Gaussian fit to a window around the seeded bilayer center (like the per-triangle fit), so adjacent membranes / CTF ringing can't pull the global midpoint (the poor-fit fallback) off-center. - Add tests covering the center+half-separation model, the two-leaflet seeder, and that the windowed fit recovers the true bilayer center on an asymmetric bilayer + confounder (the "both Gaussians in one leaflet" failure) and leaves a well-centered bilayer unmoved. Co-Authored-By: Claude Opus 4.8 --- surface_morphometrics/refine_mesh.py | 7 +++- tests/test_thickness_worker.py | 57 ++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/surface_morphometrics/refine_mesh.py b/surface_morphometrics/refine_mesh.py index 270cdf5..2260a5a 100644 --- a/surface_morphometrics/refine_mesh.py +++ b/surface_morphometrics/refine_mesh.py @@ -849,10 +849,15 @@ def refine_mesh_iteration(graph_file, vtp_file, mrc_file, output_base, pixel_siz # Center + half-separation parameterization keeps the two leaflet # peaks apart so the global fit cannot collapse into one leaflet. center_seed, half_seed = _seed_bilayer_center(ag, bg) + # Restrict the fit to a window around the seeded bilayer so distant + # density (adjacent membranes, CTF ringing) cannot pull the fit out. + fit_window = MAX_THICKNESS / 2.0 + 2.0 + gmask = (ag >= center_seed - fit_window) & (ag <= center_seed + fit_window) + agf, bgf = (ag[gmask], bg[gmask]) if int(gmask.sum()) >= 7 else (ag, bg) p0g = [0.02, 1.5, 0.02, 1.5, center_seed, half_seed, 0] bounds_g = ([0.005, 0.8, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], [0.04, 2.2, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) - popt_g, _ = opt.curve_fit(_dual_gaussian_centered, ag, bg, p0g, bounds=bounds_g) + popt_g, _ = opt.curve_fit(_dual_gaussian_centered, agf, bgf, p0g, bounds=bounds_g) center_g, half_g = popt_g[4], popt_g[5] c1_g, c2_g = center_g - half_g, center_g + half_g global_center_offset = center_g diff --git a/tests/test_thickness_worker.py b/tests/test_thickness_worker.py index 8e2ca2d..cd3ab6f 100644 --- a/tests/test_thickness_worker.py +++ b/tests/test_thickness_worker.py @@ -1,9 +1,66 @@ """Tests for the pure fitting/alignment helpers in _thickness_worker (numpy/scipy).""" import numpy as np +import scipy.optimize as opt from surface_morphometrics import _thickness_worker as tw +def _fit_bilayer(dat, x): + """Mirror the dual-Gaussian offset fit used in fit_triangle_chunk_offsets.""" + mid = len(dat) // 2 + lm = np.argmin(dat[:mid]) + rm = np.argmin(dat[mid:]) + mid + a, b = x[lm + 2:rm - 2], dat[lm + 2:rm - 2] + center_seed, half_seed = tw._seed_bilayer_center(a, b) + window = tw.MAX_THICKNESS / 2.0 + 2.0 + m = (a >= center_seed - window) & (a <= center_seed + window) + af, bf = (a[m], b[m]) if m.sum() >= 5 else (a, b) + p0 = [0.02, 1.5, 0.02, 1.5, center_seed, half_seed, 0.0] + bounds = ([0.005, 0.8, 0.005, 0.8, center_seed - 3.0, tw.MIN_THICKNESS / 2.0, -1], + [0.04, 2.2, 0.04, 2.2, center_seed + 3.0, tw.MAX_THICKNESS / 2.0, 1]) + p, _ = opt.curve_fit(tw._dual_gaussian_centered, af, bf, p0, bounds=bounds) + return p[4], p[5] # center, half_sep + + +def test_dual_gaussian_centered_is_two_separated_peaks(): + x = np.linspace(-8, 8, 200) + y = tw._dual_gaussian_centered(x, 1.0, 1.0, 1.0, 1.0, center=0.5, half_sep=2.0, o=0.1) + # equals two Gaussians at center -/+ half_sep, plus offset + expected = (tw._monogaussian(x, 1, 0.5 - 2.0, 1) + + tw._monogaussian(x, 1, 0.5 + 2.0, 1) + 0.1) + assert np.allclose(y, expected) + + +def test_seed_bilayer_center_finds_two_leaflets(): + x = np.linspace(-10, 10, 201) + # leaflets at -0.6 and +3.0 -> center +1.2, half 1.8 + b = tw._monogaussian(x, 1.0, -0.6, 1.0) + tw._monogaussian(x, 0.8, 3.0, 1.0) + center, half = tw._seed_bilayer_center(x, b) + assert abs(center - 1.2) < 0.2 and abs(half - 1.8) < 0.2 + + +def test_fit_recovers_center_of_asymmetric_bilayer_with_confounder(): + # The reported failure: taller leaflet + nearby density made the old fit put + # both Gaussians in one leaflet. The centered+windowed fit must recover the + # true bilayer center (+1.2 nm), not a single leaflet position. + x = np.linspace(-10, 10, 161) + dat = (0.011 + + tw._monogaussian(x, 0.018, 1.2 - 1.8, 1.0) # taller left leaflet + + tw._monogaussian(x, 0.015, 1.2 + 1.8, 1.0) # shorter right leaflet + + tw._monogaussian(x, 0.014, 7.0, 1.0)) # confounding nearby density + center, half = _fit_bilayer(dat, x) + assert abs(center - 1.2) < 0.3 # bilayer center, not a leaflet + assert tw.MIN_THICKNESS <= 2 * half <= tw.MAX_THICKNESS # peaks stay separated + assert half >= tw.MIN_THICKNESS / 2.0 + + +def test_fit_keeps_well_centered_bilayer_centered(): + x = np.linspace(-10, 10, 161) + dat = 0.011 + tw._monogaussian(x, 0.017, -2.0, 1.1) + tw._monogaussian(x, 0.017, 2.0, 1.1) + center, half = _fit_bilayer(dat, x) + assert abs(center) < 0.2 and abs(2 * half - 4.0) < 0.5 # no spurious move + + def test_monogaussian_peak_and_symmetry(): x = np.linspace(-10, 10, 401) y = tw._monogaussian(x, h=2.0, c=1.0, w=1.5) From e4e28ba80c3d7d559588e54f3d81bd5c22fc506f Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Sat, 20 Jun 2026 11:30:28 -0700 Subject: [PATCH 03/11] refine_mesh: fall back to single Gaussian when only one leaflet is resolved Forcing the center+half_sep dual Gaussian (half_sep >= 1 nm) onto a single, poorly-resolved peak still centered correctly but reported a spurious ~2 nm bilayer. _seed_bilayer_center now counts prominent peaks; the per-triangle and global fits only attempt the dual Gaussian when two leaflets are actually resolved (n_peaks >= 2). One-peak regions fall through to the single-Gaussian step (method 2), which centers on the peak without a fake thickness. Tests cover the single-peak fallback. Co-Authored-By: Claude Opus 4.8 --- surface_morphometrics/_thickness_worker.py | 54 +++++++++++++--------- surface_morphometrics/refine_mesh.py | 7 +-- tests/test_thickness_worker.py | 31 ++++++++++++- 3 files changed, 65 insertions(+), 27 deletions(-) diff --git a/surface_morphometrics/_thickness_worker.py b/surface_morphometrics/_thickness_worker.py index c5df293..9c02707 100644 --- a/surface_morphometrics/_thickness_worker.py +++ b/surface_morphometrics/_thickness_worker.py @@ -43,30 +43,35 @@ def _dual_gaussian_centered(x, h1, w1, h2, w2, center, half_sep, o): def _seed_bilayer_center(a, b, default_center=0.0): - """Estimate (center, half_sep) of a bilayer profile region for fit seeding. - - Locates the two tallest peaks in the profile (the two leaflets) and seeds the - center at their midpoint and half_sep at half their separation. Falls back to - ``default_center`` if fewer than two peaks are found. ``half_sep`` is clamped - to the physical thickness range. + """Estimate (center, half_sep, n_peaks) of a bilayer profile region. + + Locates prominent peaks (the leaflets) and seeds the center at the midpoint of + the two tallest and half_sep at half their separation. ``n_peaks`` is the count + of prominent peaks: callers use it to decide whether a bilayer is actually + resolved (>= 2) or whether to fall back to a single Gaussian (poorly resolved / + high-defocus regions). Falls back to ``default_center`` if no peak is found; + ``half_sep`` is clamped to the physical thickness range. """ from scipy.signal import find_peaks a = np.asarray(a) b = np.asarray(b) - peaks, _ = find_peaks(b) - if len(peaks) >= 2: - # the two tallest peaks are the leaflets + rng = float(b.max() - b.min()) + # Require each peak to rise at least 5% of the profile range above its + # surroundings, so noise bumps are not mistaken for a second leaflet. + peaks, _ = find_peaks(b, prominence=(0.05 * rng if rng > 0 else None)) + n_peaks = len(peaks) + if n_peaks >= 2: tallest = peaks[np.argsort(b[peaks])[-2:]] p_left, p_right = sorted(a[tallest]) center = 0.5 * (p_left + p_right) half = 0.5 * (p_right - p_left) - elif len(peaks) == 1: + elif n_peaks == 1: center, half = float(a[peaks[0]]), MIN_THICKNESS / 2.0 else: center, half = default_center, MIN_THICKNESS / 2.0 half = float(np.clip(half, MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) - return float(center), half + return float(center), half, n_peaks def _compute_r_squared(y_true, y_pred): @@ -525,18 +530,23 @@ def fit_triangle_chunk_offsets(indices): # Step 1: Try dual Gaussian (unless monolayer mode). Parameterized by the # bilayer center + half-separation so the two leaflet peaks are forced to # opposite sides and cannot collapse into the same leaflet. - if not _worker_monolayer: + if _worker_global_fit_params is not None: + # Seed from the (more reliable) global average fit, which already + # confirmed a resolved bilayer. + c1_g, w1_g, c2_g, w2_g = _worker_global_fit_params + center_seed = 0.5 * (c1_g + c2_g) + half_seed = float(np.clip(0.5 * (c2_g - c1_g), + MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) + w1_seed, w2_seed = w1_g, w2_g + n_resolved = 2 + else: + center_seed, half_seed, n_resolved = _seed_bilayer_center(a, b) + w1_seed = w2_seed = 1.5 + + # Only attempt the dual Gaussian when two leaflets are actually resolved; + # poorly-resolved regions (one peak) fall through to the single Gaussian. + if not _worker_monolayer and n_resolved >= 2: try: - if _worker_global_fit_params is not None: - # Seed from the (more reliable) global average fit. - c1_g, w1_g, c2_g, w2_g = _worker_global_fit_params - center_seed = 0.5 * (c1_g + c2_g) - half_seed = float(np.clip(0.5 * (c2_g - c1_g), - MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) - w1_seed, w2_seed = w1_g, w2_g - else: - center_seed, half_seed = _seed_bilayer_center(a, b) - w1_seed = w2_seed = 1.5 # Restrict the fit to a window around the seeded bilayer so distant # density (adjacent membranes, CTF ringing) cannot pull the fit out. fit_window = MAX_THICKNESS / 2.0 + 2.0 diff --git a/surface_morphometrics/refine_mesh.py b/surface_morphometrics/refine_mesh.py index 2260a5a..9767704 100644 --- a/surface_morphometrics/refine_mesh.py +++ b/surface_morphometrics/refine_mesh.py @@ -845,10 +845,10 @@ def refine_mesh_iteration(graph_file, vtp_file, mrc_file, output_base, pixel_siz rm_g = np.argmin(pre_disp_avg[mid_g:]) + mid_g ag = x_positions[lm_g + 2:rm_g - 2] bg = pre_disp_avg[lm_g + 2:rm_g - 2] - if len(ag) >= 7: + center_seed, half_seed, n_resolved = _seed_bilayer_center(ag, bg) + if len(ag) >= 7 and n_resolved >= 2: # Center + half-separation parameterization keeps the two leaflet # peaks apart so the global fit cannot collapse into one leaflet. - center_seed, half_seed = _seed_bilayer_center(ag, bg) # Restrict the fit to a window around the seeded bilayer so distant # density (adjacent membranes, CTF ringing) cannot pull the fit out. fit_window = MAX_THICKNESS / 2.0 + 2.0 @@ -868,7 +868,8 @@ def refine_mesh_iteration(graph_file, vtp_file, mrc_file, output_base, pixel_siz f"center={global_center_offset:+.3f} nm, " f"thickness={2.0 * half_g:.3f} nm") else: - print(" Not enough points in fitting window for global avg dual Gaussian") + print(f" Global average bilayer not resolved (n_peaks={n_resolved}) " + "or too few points; per-triangle fits will self-seed.") except Exception as e: print(f" Global avg profile dual Gaussian fit failed: {e}") diff --git a/tests/test_thickness_worker.py b/tests/test_thickness_worker.py index cd3ab6f..d36f683 100644 --- a/tests/test_thickness_worker.py +++ b/tests/test_thickness_worker.py @@ -11,7 +11,7 @@ def _fit_bilayer(dat, x): lm = np.argmin(dat[:mid]) rm = np.argmin(dat[mid:]) + mid a, b = x[lm + 2:rm - 2], dat[lm + 2:rm - 2] - center_seed, half_seed = tw._seed_bilayer_center(a, b) + center_seed, half_seed, _ = tw._seed_bilayer_center(a, b) window = tw.MAX_THICKNESS / 2.0 + 2.0 m = (a >= center_seed - window) & (a <= center_seed + window) af, bf = (a[m], b[m]) if m.sum() >= 5 else (a, b) @@ -35,10 +35,37 @@ def test_seed_bilayer_center_finds_two_leaflets(): x = np.linspace(-10, 10, 201) # leaflets at -0.6 and +3.0 -> center +1.2, half 1.8 b = tw._monogaussian(x, 1.0, -0.6, 1.0) + tw._monogaussian(x, 0.8, 3.0, 1.0) - center, half = tw._seed_bilayer_center(x, b) + center, half, n_peaks = tw._seed_bilayer_center(x, b) + assert n_peaks == 2 assert abs(center - 1.2) < 0.2 and abs(half - 1.8) < 0.2 +def test_seed_bilayer_center_reports_single_unresolved_peak(): + x = np.linspace(-10, 10, 201) + b = tw._monogaussian(x, 1.0, 1.0, 2.5) # one broad, unresolved peak + center, half, n_peaks = tw._seed_bilayer_center(x, b) + assert n_peaks == 1 # only one leaflet resolved + assert abs(center - 1.0) < 0.2 # center seeded at the single peak + + +def test_single_resolved_peak_falls_back_to_single_gaussian(): + # A poorly-resolved region (one broad peak): the dual fit must NOT be forced; + # n_resolved < 2 routes it to the single-Gaussian step, which still centers on + # the peak (no spurious ~2 nm bilayer reported). + x = np.linspace(-10, 10, 161) + dat = 0.011 + tw._monogaussian(x, 0.02, 1.0, 2.5) + mid = len(dat) // 2 + lm = np.argmin(dat[:mid]); rm = np.argmin(dat[mid:]) + mid + a, b = x[lm + 2:rm - 2], dat[lm + 2:rm - 2] + _, _, n_resolved = tw._seed_bilayer_center(a, b) + assert n_resolved < 2 # dual Gaussian is skipped + # the single-Gaussian fallback recovers the peak center + p_mono, _ = opt.curve_fit(tw._monogaussian_with_offset, a, b, + [0.03, 1.0, 2.5, 0], + bounds=([0.005, -6, 1.0, -1], [0.06, 6, 5.0, 1])) + assert abs(p_mono[1] - 1.0) < 0.3 + + def test_fit_recovers_center_of_asymmetric_bilayer_with_confounder(): # The reported failure: taller leaflet + nearby density made the old fit put # both Gaussians in one leaflet. The centered+windowed fit must recover the From 4475d46e8e449015aaf17bc98c76e6acaeae0b24 Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Sat, 20 Jun 2026 11:35:12 -0700 Subject: [PATCH 04/11] refine_mesh: robust leaflet seeding (reject shoulders and distant confounders) Seed the bilayer by anchoring on the most prominent peak (the membrane) and pairing it with a partner peak that is a physical bilayer thickness (2-7 nm) away and prominent enough (>= 30% of the anchor). This: - rejects small flanking shoulders (neighboring protein density) by prominence, - rejects distant confounders (other membranes/protein > 7 nm) by separation, - keeps genuinely asymmetric bilayers as two leaflets. Regions with no qualifying partner resolve as a single peak -> single-Gaussian fallback. Tests cover the shoulder and asymmetric+confounder cases. Co-Authored-By: Claude Opus 4.8 --- surface_morphometrics/_thickness_worker.py | 63 ++++++++++++++-------- tests/test_thickness_worker.py | 22 ++++++++ 2 files changed, 64 insertions(+), 21 deletions(-) diff --git a/surface_morphometrics/_thickness_worker.py b/surface_morphometrics/_thickness_worker.py index 9c02707..7a80aed 100644 --- a/surface_morphometrics/_thickness_worker.py +++ b/surface_morphometrics/_thickness_worker.py @@ -12,6 +12,10 @@ R2_THRESHOLD = 0.6 MIN_THICKNESS = 2.0 # nm (minimum accepted peak-to-peak leaflet separation) MAX_THICKNESS = 7.0 # nm (maximum accepted peak-to-peak leaflet separation) +# A second peak only counts as a real leaflet if its prominence is at least this +# fraction of the strongest peak's; this rejects small flanking shoulders (e.g. +# neighboring protein density) that would otherwise be mistaken for a leaflet. +MIN_LEAFLET_PROMINENCE_RATIO = 0.3 def _monogaussian(x, h, c, w): @@ -43,14 +47,16 @@ def _dual_gaussian_centered(x, h1, w1, h2, w2, center, half_sep, o): def _seed_bilayer_center(a, b, default_center=0.0): - """Estimate (center, half_sep, n_peaks) of a bilayer profile region. - - Locates prominent peaks (the leaflets) and seeds the center at the midpoint of - the two tallest and half_sep at half their separation. ``n_peaks`` is the count - of prominent peaks: callers use it to decide whether a bilayer is actually - resolved (>= 2) or whether to fall back to a single Gaussian (poorly resolved / - high-defocus regions). Falls back to ``default_center`` if no peak is found; - ``half_sep`` is clamped to the physical thickness range. + """Estimate (center, half_sep, n_resolved) of a bilayer profile region. + + Finds peaks by prominence (so a peak must genuinely rise above its surroundings, + not just be elevated by a neighbor's tail), then takes the two **most prominent** + as the leaflets. A second leaflet only counts if its prominence is at least + ``MIN_LEAFLET_PROMINENCE_RATIO`` of the strongest peak's, which rejects small + flanking shoulders (e.g. neighboring protein density). ``n_resolved`` is the + number of real leaflets (2 -> fit a dual Gaussian; 1 -> a single central peak, + fall back to a single Gaussian; 0 -> nothing resolved). ``half_sep`` is clamped + to the physical thickness range. """ from scipy.signal import find_peaks @@ -58,20 +64,35 @@ def _seed_bilayer_center(a, b, default_center=0.0): b = np.asarray(b) rng = float(b.max() - b.min()) # Require each peak to rise at least 5% of the profile range above its - # surroundings, so noise bumps are not mistaken for a second leaflet. - peaks, _ = find_peaks(b, prominence=(0.05 * rng if rng > 0 else None)) - n_peaks = len(peaks) - if n_peaks >= 2: - tallest = peaks[np.argsort(b[peaks])[-2:]] - p_left, p_right = sorted(a[tallest]) + # surroundings, so noise bumps are not picked up at all. + peaks, props = find_peaks(b, prominence=(0.05 * rng if rng > 0 else None)) + if len(peaks) == 0: + return default_center, MIN_THICKNESS / 2.0, 0 + + positions = a[peaks] + proms = props["prominences"] + # Anchor on the strongest peak (the dominant membrane density), then look for + # the partner leaflet: a peak a physical bilayer thickness (MIN..MAX) away that + # is prominent enough. This excludes distant confounders (other membranes / + # protein further than MAX_THICKNESS) and rejects weak flanking shoulders. + primary = int(np.argmax(proms)) + partner = None + for j in range(len(peaks)): + if j == primary: + continue + sep = abs(positions[j] - positions[primary]) + if (MIN_THICKNESS <= sep <= MAX_THICKNESS + and proms[j] >= MIN_LEAFLET_PROMINENCE_RATIO * proms[primary]): + if partner is None or proms[j] > proms[partner]: + partner = j + if partner is not None: + p_left, p_right = sorted([positions[primary], positions[partner]]) center = 0.5 * (p_left + p_right) - half = 0.5 * (p_right - p_left) - elif n_peaks == 1: - center, half = float(a[peaks[0]]), MIN_THICKNESS / 2.0 - else: - center, half = default_center, MIN_THICKNESS / 2.0 - half = float(np.clip(half, MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) - return float(center), half, n_peaks + half = float(np.clip(0.5 * (p_right - p_left), + MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) + return float(center), half, 2 + # One real peak (a single, possibly unresolved, membrane with weak shoulders). + return float(positions[primary]), MIN_THICKNESS / 2.0, 1 def _compute_r_squared(y_true, y_pred): diff --git a/tests/test_thickness_worker.py b/tests/test_thickness_worker.py index d36f683..c67e8c5 100644 --- a/tests/test_thickness_worker.py +++ b/tests/test_thickness_worker.py @@ -48,6 +48,28 @@ def test_seed_bilayer_center_reports_single_unresolved_peak(): assert abs(center - 1.0) < 0.2 # center seeded at the single peak +def test_seed_rejects_small_shoulders_as_single_peak(): + # A single central membrane peak flanked by two smaller protein-density + # shoulders. The shoulders are local maxima but have tiny prominence, so they + # must NOT be taken as leaflets -- the region is one (unresolved) membrane. + x = np.linspace(-10, 10, 161) + b = (tw._monogaussian(x, 0.020, 0.0, 1.5) # central membrane + + tw._monogaussian(x, 0.008, -4.0, 1.0) # protein shoulder + + tw._monogaussian(x, 0.008, 4.0, 1.0)) # protein shoulder + center, half, n_resolved = tw._seed_bilayer_center(x, b) + assert n_resolved == 1 # shoulders rejected, only the membrane counts + assert abs(center) < 0.3 # centered on the membrane, not a shoulder/midpoint + + +def test_seed_keeps_asymmetric_bilayer_as_two_leaflets(): + # Genuinely asymmetric bilayer (one leaflet taller) must still resolve as two. + x = np.linspace(-10, 10, 161) + b = tw._monogaussian(x, 0.018, -0.6, 1.0) + tw._monogaussian(x, 0.015, 3.0, 1.0) + center, half, n_resolved = tw._seed_bilayer_center(x, b) + assert n_resolved == 2 + assert abs(center - 1.2) < 0.3 + + def test_single_resolved_peak_falls_back_to_single_gaussian(): # A poorly-resolved region (one broad peak): the dual fit must NOT be forced; # n_resolved < 2 routes it to the single-Gaussian step, which still centers on From 5b05b711cb358e7778da749480aed800abb1f256 Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Sat, 20 Jun 2026 11:39:03 -0700 Subject: [PATCH 05/11] Test partial-mixing bilayer: dual fit recovers center through a shallow dip Co-Authored-By: Claude Opus 4.8 --- tests/test_thickness_worker.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/test_thickness_worker.py b/tests/test_thickness_worker.py index c67e8c5..6a4a0e5 100644 --- a/tests/test_thickness_worker.py +++ b/tests/test_thickness_worker.py @@ -70,6 +70,19 @@ def test_seed_keeps_asymmetric_bilayer_as_two_leaflets(): assert abs(center - 1.2) < 0.3 +def test_partial_mixing_with_shallow_dip_still_fits_dual(): + # Overlapping leaflets that still leave a shallow saddle (partial mixing) must + # resolve as two and the dual fit must recover the center exactly. + x = np.linspace(-10, 10, 201) + dat = 0.011 + tw._monogaussian(x, 0.017, -1.75, 1.0) + tw._monogaussian(x, 0.017, 1.75, 1.0) + mid = len(dat) // 2 + lm = np.argmin(dat[:mid]); rm = np.argmin(dat[mid:]) + mid + _, _, n_resolved = tw._seed_bilayer_center(x[lm + 2:rm - 2], dat[lm + 2:rm - 2]) + assert n_resolved == 2 + center, half = _fit_bilayer(dat, x) + assert abs(center) < 0.1 and abs(2 * half - 3.5) < 0.4 + + def test_single_resolved_peak_falls_back_to_single_gaussian(): # A poorly-resolved region (one broad peak): the dual fit must NOT be forced; # n_resolved < 2 routes it to the single-Gaussian step, which still centers on From 14c02f0798bb859077d7efe7221280990c26dccc Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Mon, 22 Jun 2026 21:55:11 -0700 Subject: [PATCH 06/11] Make dual-Gaussian bilayer fitting robust for merged/partially-mixed membranes The density-profile bilayer fit (used by both mesh refinement and thickness measurement) had three problems that hurt real, well-behaved surfaces: * Leaflet detection used find_peaks prominence, which is asymmetric between the two halves of a bilayer: a hair-taller leaflet gets prominence measured to the solvent base while the other only reaches the inter-leaflet saddle, so their ratio is ~0.2 even for a symmetric bilayer. This made essentially every clean bilayer fall back to a single Gaussian. Detect leaflets by height above the shared solvent base instead (and enumerate all local maxima, no prominence floor), which also keeps a thin/merged bilayer's shallow-saddle second leaflet. * The dual model used independent leaflet widths, letting the fit absorb an asymmetric outer flank into unequal widths; because both peaks share one center, that width tilt shifted the fitted bilayer center ~0.1 nm to one side. Switch to a shared-width, centered (center + half-separation) model fit over a window symmetric about the seed, so the leaflets stay symmetric and the center sits on the true midpoint. * On a surface whose global average clearly resolves a bilayer, a locally-merged triangle was either force-fit into a spurious ~2 nm bilayer (mis-centering skewed peaks) or dropped entirely. Add a two-tier gate: strictly-resolved triangles fit and validate on R^2 + physical thickness; merged triangles are recovered from the global prior but accepted only when non-degenerate and center-stable (_dual_recovery_ok), so genuine single/skewed peaks are still rejected to the single Gaussian. measure_thickness now computes the global bilayer prior, applies the recovery tier, and reports a continuous per-triangle bilayer_resolution score (the height-above-base leaflet ratio) in the exported CSV -- a reliability flag that separates strictly-resolved (>= 0.5) from prior-recovered (slightly thin, < 0.5) measurements, since R^2 cannot. The whole-surface average fit and its _fit.svg plot use the same shared-width model so the published fit matches what drives the measurements. Co-Authored-By: Claude Opus 4.8 --- surface_morphometrics/_thickness_worker.py | 370 ++++++++++++++++----- surface_morphometrics/measure_thickness.py | 90 ++++- surface_morphometrics/refine_mesh.py | 25 +- tests/test_thickness_worker.py | 210 +++++++++++- 4 files changed, 580 insertions(+), 115 deletions(-) diff --git a/surface_morphometrics/_thickness_worker.py b/surface_morphometrics/_thickness_worker.py index 7a80aed..992867e 100644 --- a/surface_morphometrics/_thickness_worker.py +++ b/surface_morphometrics/_thickness_worker.py @@ -12,10 +12,21 @@ R2_THRESHOLD = 0.6 MIN_THICKNESS = 2.0 # nm (minimum accepted peak-to-peak leaflet separation) MAX_THICKNESS = 7.0 # nm (maximum accepted peak-to-peak leaflet separation) -# A second peak only counts as a real leaflet if its prominence is at least this -# fraction of the strongest peak's; this rejects small flanking shoulders (e.g. -# neighboring protein density) that would otherwise be mistaken for a leaflet. -MIN_LEAFLET_PROMINENCE_RATIO = 0.3 +# A second peak only counts as a real leaflet if it rises at least this fraction +# of the strongest peak's height above the shared solvent baseline. Comparing +# height-above-base (not find_peaks prominence) keeps a merged bilayer's second +# leaflet -- which can have a near-zero saddle and so a tiny prominence -- while +# still rejecting small flanking shoulders (e.g. neighboring protein density). +MIN_LEAFLET_HEIGHT_RATIO = 0.5 + +# "Prior-recovery" tier: when the surface's global average resolves a clean bilayer, +# a triangle whose locally-averaged profile merges into a single peak can still hold a +# real (just blurred) bilayer. Seeded from the global fit, its dual fit is trusted +# only if it is non-degenerate and center-stable -- distinguishing a recovered merged +# bilayer from a genuine single / skewed peak: +MIN_LEAFLET_AMP_RATIO = 0.3 # both leaflets must have comparable amplitude +MAX_CENTER_DISAGREEMENT = 0.7 # nm; dual center must agree with a single-Gaussian fit +PINNED_THICKNESS_EPS = 0.15 # nm above MIN_THICKNESS; reject floor-pinned (fully merged) fits def _monogaussian(x, h, c, w): @@ -46,45 +57,99 @@ def _dual_gaussian_centered(x, h1, w1, h2, w2, center, half_sep, o): + _monogaussian(x, h2, center + half_sep, w2) + o) +def _dual_gaussian_shared_width(x, h1, h2, w, center, half_sep, o): + """Dual Gaussian like :func:`_dual_gaussian_centered` but with one shared width. + + A bilayer's two leaflets have essentially the same width, so tying them to a + single ``w`` is more physical -- and, critically, removes a centering bias: + with independent widths the fit absorbs *asymmetric* outer density (a heavier + flank on one side, CTF ringing) by making one leaflet wider than the other, and + because both peaks share a single ``center`` that width-tilt is converted into a + systematic shift of the fitted center toward one side. Sharing the width leaves + the center free to sit at the true bilayer midpoint. + """ + return _dual_gaussian_centered(x, h1, w, h2, w, center, half_sep, o) + + +def _symmetric_fit_window(a, b, center_seed, half_seed, buffer=1.5, min_half_width=2.5): + """Window the profile symmetrically about ``center_seed`` for the dual fit. + + The window is scaled to the seeded bilayer (``half_seed + buffer``) and kept + symmetric about the seed: if the profile extends further on one side than the + other, the window is trimmed to the shorter side so the fit sees an equal x-range + each way. An asymmetric range would otherwise give the least-squares fit more + leverage on one side and pull the center off the midpoint. Falls back to the full + region if the symmetric window would be too small to fit. + """ + a = np.asarray(a) + b = np.asarray(b) + half_width = max(half_seed + buffer, min_half_width) + half_width = min(half_width, center_seed - float(a.min()), float(a.max()) - center_seed) + mask = (a >= center_seed - half_width) & (a <= center_seed + half_width) + if int(mask.sum()) >= 5: + return a[mask], b[mask] + return a, b + + def _seed_bilayer_center(a, b, default_center=0.0): """Estimate (center, half_sep, n_resolved) of a bilayer profile region. - Finds peaks by prominence (so a peak must genuinely rise above its surroundings, - not just be elevated by a neighbor's tail), then takes the two **most prominent** - as the leaflets. A second leaflet only counts if its prominence is at least - ``MIN_LEAFLET_PROMINENCE_RATIO`` of the strongest peak's, which rejects small - flanking shoulders (e.g. neighboring protein density). ``n_resolved`` is the - number of real leaflets (2 -> fit a dual Gaussian; 1 -> a single central peak, - fall back to a single Gaussian; 0 -> nothing resolved). ``half_sep`` is clamped - to the physical thickness range. + Enumerates every local maximum (no prominence floor), anchors on the tallest + peak -- the dominant membrane density -- and pairs it with a partner leaflet a + physical bilayer thickness (MIN..MAX nm) away. The partner is validated by its + height above the shared solvent baseline (``b.min()``), not by ``find_peaks`` + prominence. This matters for two reasons: + + * A merged/thin bilayer (e.g. an OMM at this resolution) has two real leaflets + separated by a near-flat saddle, so the shorter leaflet's *prominence* is tiny + and any prominence floor -- or comparing the two leaflets' prominences -- drops + it. Height above the common base is independent of saddle depth, so both + leaflets of a merged bilayer are kept. + * Comparing height-above-base still rejects small flanking shoulders (neighbouring + protein density), which sit well below ``MIN_LEAFLET_HEIGHT_RATIO`` of the + membrane peak, and the MIN..MAX window rejects distant confounders. + + Being permissive here is safe: the caller validates every dual fit by R^2 and + fitted thickness, so a spurious partner is discarded downstream, whereas a missed + leaflet would wrongly skip the dual fit entirely. ``n_resolved`` is the number of + real leaflets (2 -> fit a dual Gaussian; 1 -> a single central peak, fall back to + a single Gaussian; 0 -> nothing resolved). ``half_sep`` is clamped to the physical + thickness range. """ from scipy.signal import find_peaks a = np.asarray(a) b = np.asarray(b) - rng = float(b.max() - b.min()) - # Require each peak to rise at least 5% of the profile range above its - # surroundings, so noise bumps are not picked up at all. - peaks, props = find_peaks(b, prominence=(0.05 * rng if rng > 0 else None)) + # All strict local maxima. No prominence floor: a merged bilayer's second leaflet + # has near-zero prominence but is still a real leaflet; it is filtered below by + # height-above-base and separation instead. + peaks, _ = find_peaks(b) if len(peaks) == 0: return default_center, MIN_THICKNESS / 2.0, 0 positions = a[peaks] - proms = props["prominences"] - # Anchor on the strongest peak (the dominant membrane density), then look for - # the partner leaflet: a peak a physical bilayer thickness (MIN..MAX) away that - # is prominent enough. This excludes distant confounders (other membranes / - # protein further than MAX_THICKNESS) and rejects weak flanking shoulders. - primary = int(np.argmax(proms)) + heights = b[peaks] + base = float(b.min()) + # Anchor on the tallest peak (dominant membrane density), then look for the + # partner leaflet: a peak a physical bilayer thickness (MIN..MAX) away whose + # height above the shared solvent base is a real fraction of the anchor's. + primary = int(np.argmax(heights)) + primary_rise = float(heights[primary] - base) partner = None + best_partner_rise = 0.0 for j in range(len(peaks)): if j == primary: continue sep = abs(positions[j] - positions[primary]) - if (MIN_THICKNESS <= sep <= MAX_THICKNESS - and proms[j] >= MIN_LEAFLET_PROMINENCE_RATIO * proms[primary]): - if partner is None or proms[j] > proms[partner]: - partner = j + if not (MIN_THICKNESS <= sep <= MAX_THICKNESS): + continue + partner_rise = float(heights[j] - base) + if primary_rise <= 0: + continue + if (partner_rise >= MIN_LEAFLET_HEIGHT_RATIO * primary_rise + and partner_rise > best_partner_rise): + partner = j + best_partner_rise = partner_rise if partner is not None: p_left, p_right = sorted([positions[primary], positions[partner]]) center = 0.5 * (p_left + p_right) @@ -95,6 +160,73 @@ def _seed_bilayer_center(a, b, default_center=0.0): return float(positions[primary]), MIN_THICKNESS / 2.0, 1 +def _leaflet_resolution(a, b): + """Continuous bilayer-resolution score in [0, 1] for a profile region. + + This is the continuous form of the resolution gate: the height (above the shared + solvent base) of the best second leaflet within a physical bilayer distance of the + dominant one, as a fraction of the dominant leaflet's height. A clearly resolved + bilayer scores near 1; a profile with only a weak shoulder scores low; a fully + merged single peak scores 0. The gate accepts a triangle as strictly resolved at + ``MIN_LEAFLET_HEIGHT_RATIO`` (0.5); below that a thickness is only obtained by + prior recovery and reads systematically thin, so the score doubles as a + per-triangle reliability flag for the reported thickness. + """ + from scipy.signal import find_peaks + + a = np.asarray(a) + b = np.asarray(b) + peaks, _ = find_peaks(b) + if len(peaks) < 2: + return 0.0 + positions = a[peaks] + heights = b[peaks] + base = float(b.min()) + primary = int(np.argmax(heights)) + primary_rise = float(heights[primary] - base) + if primary_rise <= 0: + return 0.0 + best = 0.0 + for j in range(len(peaks)): + if j == primary: + continue + if MIN_THICKNESS <= abs(positions[j] - positions[primary]) <= MAX_THICKNESS: + best = max(best, float(heights[j] - base) / primary_rise) + return float(min(best, 1.0)) + + +def _dual_recovery_ok(a, b, popt, r2, center_seed): + """Accept a global-prior-seeded dual fit on a single-peak (merged) triangle? + + Used only when the surface's global average resolves a bilayer but this triangle's + local profile shows one peak. The dual fit is trusted only if it is a real, + non-degenerate, centered bilayer rather than a forced split of a single peak: + + * a good fit with a physical thickness clear of the floor -- a floor-pinned fit + means the leaflets merged completely, so there is no real separation to report; + * both leaflets comparable in amplitude (neither Gaussian collapsed to nothing); + * the dual center agrees with an independent single-Gaussian center -- a genuine + skewed single peak splits asymmetrically and the two disagree. + + ``popt`` is the ``_dual_gaussian_shared_width`` result (h1, h2, w, center, half, o). + """ + thickness = 2.0 * popt[4] + if not (r2 > R2_THRESHOLD + and MIN_THICKNESS + PINNED_THICKNESS_EPS < thickness <= MAX_THICKNESS): + return False + h1, h2 = popt[0], popt[1] + if min(h1, h2) < MIN_LEAFLET_AMP_RATIO * max(h1, h2): + return False + try: + ps, _ = opt.curve_fit(_monogaussian_with_offset, a, b, + [0.03, center_seed, 2.5, 0], + bounds=([0.005, center_seed - 6, 1.0, -1], + [0.06, center_seed + 6, 5.0, 1])) + except Exception: + return False + return abs(popt[3] - ps[1]) < MAX_CENTER_DISAGREEMENT + + def _compute_r_squared(y_true, y_pred): """Compute R² (coefficient of determination).""" ss_res = np.sum((y_true - y_pred) ** 2) @@ -284,7 +416,6 @@ def compute_thickness_chunk(indices): dat = dat / (80/81 * dat_sum) try: - ipk = _lt_x_positions[np.argmax(dat)] mid = len(dat) // 2 left_min = np.argmin(dat[:mid]) right_min = np.argmin(dat[mid:]) + mid @@ -292,16 +423,26 @@ def compute_thickness_chunk(indices): a = _lt_x_positions[left_min+2:right_min-2] b = dat[left_min+2:right_min-2] - if len(a) >= 7: - p0 = [0.02, ipk-0.5, 1.5, 0.02, ipk+0.5, 1.5, 0] - bounds = ([0.005, ipk-6, 0.8, 0.005, ipk-1.5, 0.8, -1], - [0.04, ipk+1.5, 2.2, 0.04, ipk+6, 2.2, 1]) - popt, _ = opt.curve_fit(_dual_gaussian, a, b, p0, bounds=bounds) - thickness = np.abs(popt[4] - popt[1]) - if 2.0 <= thickness <= 7.0: - results.append(thickness) - else: - results.append(np.nan) + if len(a) < 7: + results.append(np.nan) + continue + + # Same quality gate as fit_triangle_chunk: require two resolved leaflets, + # then a centered + shared-width fit accepted only on R^2 and a physical + # thickness; otherwise NaN (no valid bilayer thickness). + center_seed, half_seed, n_resolved = _seed_bilayer_center(a, b) + if n_resolved < 2: + results.append(np.nan) + continue + af, bf = _symmetric_fit_window(a, b, center_seed, half_seed) + p0 = [0.02, 0.02, 1.5, center_seed, half_seed, 0.0] + bounds = ([0.005, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], + [0.04, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) + popt, _ = opt.curve_fit(_dual_gaussian_shared_width, af, bf, p0, bounds=bounds) + r2 = _compute_r_squared(bf, _dual_gaussian_shared_width(af, *popt)) + thickness = 2.0 * popt[4] + if r2 > R2_THRESHOLD and MIN_THICKNESS <= thickness <= MAX_THICKNESS: + results.append(thickness) else: results.append(np.nan) except Exception: @@ -351,7 +492,10 @@ def fit_triangle_chunk(indices): Returns ------- list of tuples - [(thickness, offset), ...] for each triangle in chunk + [(thickness, offset, resolution), ...] for each triangle in chunk. + ``thickness`` is NaN where no valid bilayer was measured; ``resolution`` is + the per-triangle bilayer-resolution score in [0, 1] (NaN if there was no + profile to score) -- a reliability flag for the reported thickness. """ sample_spacing = _worker_x[1] - _worker_x[0] if len(_worker_x) > 1 else 1.0 max_shift_samples = max(1, int(round(2.0 / sample_spacing))) @@ -364,7 +508,7 @@ def fit_triangle_chunk(indices): neighbors = _worker_neighbor_indices[i][valid_mask] if len(neighbors) == 0: - results.append((np.nan, 0)) + results.append((np.nan, 0, np.nan)) continue weights = 1.0 / (1.0 + l) @@ -376,7 +520,7 @@ def fit_triangle_chunk(indices): dat = dat - dat.min() dat_sum = dat.sum() if dat_sum == 0: - results.append((np.nan, 0)) + results.append((np.nan, 0, np.nan)) continue dat = dat / (80 / 81 * dat_sum) else: @@ -387,7 +531,7 @@ def fit_triangle_chunk(indices): row_sums = indiv.sum(axis=1, keepdims=True) valid_rows = row_sums.flatten() > 0 if not np.any(valid_rows): - results.append((np.nan, 0)) + results.append((np.nan, 0, np.nan)) continue indiv[valid_rows] /= (80 / 81 * row_sums[valid_rows]) @@ -410,12 +554,6 @@ def fit_triangle_chunk(indices): else: dat = dat0 - # Find initial peak and set up fitting bounds - ipk = _worker_x[np.argmax(dat)] - p0 = [0.02, ipk-0.5, 1.5, 0.02, ipk+0.5, 1.5, 0] - bounds = ([0.005, ipk-6, 0.8, 0.005, ipk-1.5, 0.8, -1], - [0.04, ipk+1.5, 2.2, 0.04, ipk+6, 2.2, 1]) - # Find minima to constrain fitting region mid = len(dat) // 2 left_min = np.argmin(dat[:mid]) @@ -424,17 +562,58 @@ def fit_triangle_chunk(indices): a = _worker_x[left_min+2:right_min-2] b = dat[left_min+2:right_min-2] - if len(a) < 7: # Need enough points for 7-parameter fit - results.append((np.nan, 0)) + if len(a) < 7: # Need enough points for the fit + results.append((np.nan, 0, np.nan)) + continue + + # Per-triangle reliability flag reported alongside every thickness: how clearly + # the two leaflets are resolved (1 = cleanly separated, 0 = a single merged + # peak). Strict fits score >= MIN_LEAFLET_HEIGHT_RATIO; prior-recovered fits + # below it (and read systematically thin), so downstream can weight or filter. + resolution = _leaflet_resolution(a, b) + + # Quality gate, two tiers (mirrors the refinement path): + # * resolved (n_resolved >= 2): fit and accept on R^2 + physical thickness; + # * prior recovery (n_resolved < 2 but the global average resolved a bilayer): + # a locally-merged profile may still hold a real, blurred bilayer -- fit it + # seeded from the global prior and accept only a non-degenerate, + # center-stable result (_dual_recovery_ok). Without a prior, a single peak + # has no measurable thickness -> NaN. + center_seed, half_seed, n_resolved = _seed_bilayer_center(a, b) + have_prior = _worker_global_fit_params is not None + if have_prior: + c1_g, w1_g, c2_g, w2_g = _worker_global_fit_params + seed_c = 0.5 * (c1_g + c2_g) + seed_h = float(np.clip(0.5 * (c2_g - c1_g), + MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) + seed_w = w1_g + else: + seed_c, seed_h, seed_w = center_seed, half_seed, 1.5 + + recovery = n_resolved < 2 and have_prior + if n_resolved < 2 and not recovery: + results.append((np.nan, 0, resolution)) continue try: - p3, _ = opt.curve_fit(_dual_gaussian, a, b, p0, bounds=bounds) - thickness = np.abs(p3[4] - p3[1]) - offset = (p3[4] + p3[1]) / 2 - results.append((thickness, offset)) + af, bf = _symmetric_fit_window(a, b, seed_c, seed_h) + p0 = [0.02, 0.02, seed_w, seed_c, seed_h, 0.0] + bounds = ([0.005, 0.005, 0.8, seed_c - 3.0, MIN_THICKNESS / 2.0, -1], + [0.04, 0.04, 2.2, seed_c + 3.0, MAX_THICKNESS / 2.0, 1]) + p3, _ = opt.curve_fit(_dual_gaussian_shared_width, af, bf, p0, bounds=bounds) + r2 = _compute_r_squared(bf, _dual_gaussian_shared_width(af, *p3)) + thickness = 2.0 * p3[4] + offset = p3[3] + if recovery: + accept = _dual_recovery_ok(a, b, p3, r2, seed_c) + else: + accept = r2 > R2_THRESHOLD and MIN_THICKNESS <= thickness <= MAX_THICKNESS + if accept: + results.append((thickness, offset, resolution)) + else: + results.append((np.nan, 0, resolution)) except Exception: - results.append((np.nan, 0)) + results.append((np.nan, 0, resolution)) return results @@ -551,54 +730,71 @@ def fit_triangle_chunk_offsets(indices): # Step 1: Try dual Gaussian (unless monolayer mode). Parameterized by the # bilayer center + half-separation so the two leaflet peaks are forced to # opposite sides and cannot collapse into the same leaflet. - if _worker_global_fit_params is not None: - # Seed from the (more reliable) global average fit, which already - # confirmed a resolved bilayer. + # + # The global average fit (when available) gives more reliable seeds for the + # center/half/width. The dual fit runs in two tiers: + # * resolved (n_resolved >= 2): a clear two-leaflet profile -> fit and accept + # on R^2 + physical thickness (high confidence); + # * prior recovery (n_resolved < 2 but the global average resolved a bilayer): + # a locally-merged profile may still hold a real, blurred bilayer. Seed it + # from the global fit but accept only a non-degenerate, center-stable result + # (_dual_recovery_ok), so a genuine single / skewed peak is still rejected + # and falls through to the single Gaussian rather than inventing a bilayer. + tri_center, tri_half, n_resolved = _seed_bilayer_center(a, b) + have_prior = _worker_global_fit_params is not None + if have_prior: c1_g, w1_g, c2_g, w2_g = _worker_global_fit_params center_seed = 0.5 * (c1_g + c2_g) half_seed = float(np.clip(0.5 * (c2_g - c1_g), MIN_THICKNESS / 2.0, MAX_THICKNESS / 2.0)) - w1_seed, w2_seed = w1_g, w2_g - n_resolved = 2 + w1_seed = w2_seed = w1_g else: - center_seed, half_seed, n_resolved = _seed_bilayer_center(a, b) + center_seed, half_seed = tri_center, tri_half w1_seed = w2_seed = 1.5 - # Only attempt the dual Gaussian when two leaflets are actually resolved; - # poorly-resolved regions (one peak) fall through to the single Gaussian. - if not _worker_monolayer and n_resolved >= 2: + recovery = n_resolved < 2 and have_prior # tier 2 + if not _worker_monolayer and (n_resolved >= 2 or recovery): try: - # Restrict the fit to a window around the seeded bilayer so distant - # density (adjacent membranes, CTF ringing) cannot pull the fit out. - fit_window = MAX_THICKNESS / 2.0 + 2.0 - wmask = (a >= center_seed - fit_window) & (a <= center_seed + fit_window) - af, bf = (a[wmask], b[wmask]) if int(wmask.sum()) >= 5 else (a, b) - - # params: h1, w1, h2, w2, center, half_sep, offset - p0 = [0.02, w1_seed, 0.02, w2_seed, center_seed, half_seed, 0.0] - bounds = ([0.005, 0.8, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], - [0.04, 2.2, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) - p3, _ = opt.curve_fit(_dual_gaussian_centered, af, bf, p0, bounds=bounds) + # Restrict the fit to a window symmetric about the seeded bilayer so + # distant density (adjacent membranes, CTF ringing) cannot pull the + # fit out and an asymmetric range cannot bias the center. + af, bf = _symmetric_fit_window(a, b, center_seed, half_seed) + + # params: h1, h2, shared_w, center, half_sep, offset. A single shared + # width keeps asymmetric flanks from tilting the fit and shifting the + # center (see _dual_gaussian_shared_width). + w_seed = 0.5 * (w1_seed + w2_seed) + p0 = [0.02, 0.02, w_seed, center_seed, half_seed, 0.0] + bounds = ([0.005, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], + [0.04, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) + p3, _ = opt.curve_fit(_dual_gaussian_shared_width, af, bf, p0, bounds=bounds) + + # Validate fit (stricter, center-stable check for the recovery tier). + r2 = _compute_r_squared(bf, _dual_gaussian_shared_width(af, *p3)) + thickness = 2.0 * p3[4] + if recovery: + accept = _dual_recovery_ok(a, b, p3, r2, center_seed) + else: + accept = r2 > R2_THRESHOLD and MIN_THICKNESS <= thickness <= MAX_THICKNESS - # Validate fit - y_pred = _dual_gaussian_centered(af, *p3) - r2 = _compute_r_squared(bf, y_pred) - thickness = 2.0 * p3[5] - - if r2 > R2_THRESHOLD and MIN_THICKNESS <= thickness <= MAX_THICKNESS: - best_offset = p3[4] # bilayer center - best_sigma1 = p3[1] - best_sigma2 = p3[3] + if accept: + best_offset = p3[3] # bilayer center + best_sigma1 = p3[2] + best_sigma2 = p3[2] best_method = 1 except Exception: pass - # Step 2: Try single Gaussian if dual didn't work + # Step 2: Try single Gaussian if dual didn't work. For an unresolved triangle + # the global membrane center (when available) is a more robust seed than this + # triangle's noisy argmax, which can latch onto a spurious bump; bounds stay + # wide so a genuinely shifted membrane can still be found. if best_method == 0: try: - p0_mono = [0.03, ipk, 2.5, 0] - bounds_mono = ([0.005, ipk-6, 1.0, -1], - [0.06, ipk+6, 5.0, 1]) + mono_seed = center_seed if _worker_global_fit_params is not None else ipk + p0_mono = [0.03, mono_seed, 2.5, 0] + bounds_mono = ([0.005, mono_seed-6, 1.0, -1], + [0.06, mono_seed+6, 5.0, 1]) p_mono, _ = opt.curve_fit(_monogaussian_with_offset, a, b, p0_mono, bounds=bounds_mono) # Validate fit diff --git a/surface_morphometrics/measure_thickness.py b/surface_morphometrics/measure_thickness.py index 3b17a63..8676606 100644 --- a/surface_morphometrics/measure_thickness.py +++ b/surface_morphometrics/measure_thickness.py @@ -108,7 +108,43 @@ def find_two_peaks(x,y): return width, peak1, peak2 -from ._thickness_worker import init_worker, fit_triangle_chunk +from ._thickness_worker import (init_worker, fit_triangle_chunk, + _seed_bilayer_center, _compute_r_squared, + _dual_gaussian_shared_width, _symmetric_fit_window, + R2_THRESHOLD, MIN_THICKNESS, MAX_THICKNESS) + + +def _global_bilayer_prior(thickness_set, x): + """Fit the whole-surface average density profile to a bilayer. + + Returns ``(c1, w, c2, w)`` to seed the per-triangle recovery tier (so locally + merged triangles on a clearly-bilayer surface can still be measured), or ``None`` + if the surface average does not resolve a bilayer. + """ + avg = thickness_set.mean(axis=0).to_numpy() * -1 + avg = avg - avg.min() + total = avg.sum() + if total <= 0: + return None + avg = avg / (80 / 81 * total) + mid = len(avg) // 2 + lm = np.argmin(avg[:mid]); rm = np.argmin(avg[mid:]) + mid + a, b = x[lm + 2:rm - 2], avg[lm + 2:rm - 2] + if len(a) < 7: + return None + cs, hs, n_resolved = _seed_bilayer_center(a, b) + if n_resolved < 2: + return None + try: + af, bf = _symmetric_fit_window(a, b, cs, hs) + p, _ = opt.curve_fit(_dual_gaussian_shared_width, af, bf, + [0.02, 0.02, 1.5, cs, hs, 0], + bounds=([0.005, 0.005, 0.8, cs - 3, MIN_THICKNESS / 2.0, -1], + [0.04, 0.04, 2.2, cs + 3, MAX_THICKNESS / 2.0, 1])) + except Exception: + return None + c, half, w = p[3], p[4], p[2] + return (c - half, w, c + half, w) def process_single_surface(filename, average_radius, output_dir): @@ -192,9 +228,21 @@ def process_single_surface(filename, average_radius, output_dir): chunks = [list(range(i, min(i + chunk_size, n_triangles))) for i in range(0, n_triangles, chunk_size)] + # Whole-surface average bilayer (prior). When the surface clearly resolves a + # bilayer, this lets the per-triangle recovery tier measure locally-merged + # triangles too; each measurement carries a resolution score so the recovered + # (lower-confidence, slightly thin) values stay distinguishable. + global_fit_params = _global_bilayer_prior(thickness_set, x) + if global_fit_params is not None: + print(f" Global average resolved a bilayer; recovery tier enabled " + f"(thickness {abs(global_fit_params[2] - global_fit_params[0]):.2f} nm).") + else: + print(" Global average did not resolve a bilayer; per-triangle fits stay strict.") + # Use initializer to share data once per worker (avoids repeated pickling) with mp.Pool(n_workers, initializer=init_worker, - initargs=(thickness_arr, distances, neighbor_indices, x)) as pool: + initargs=(thickness_arr, distances, neighbor_indices, x, + False, None, False, global_fit_params)) as pool: chunk_results = list(tqdm( pool.imap(fit_triangle_chunk, chunks), total=len(chunks), @@ -205,6 +253,7 @@ def process_single_surface(filename, average_radius, output_dir): results = [r for chunk in chunk_results for r in chunk] per_surface_thickness = [r[0] for r in results] per_triangle_offset = [r[1] for r in results] + per_triangle_resolution = [r[2] for r in results] # Plot a sample of profiles for visualization for i in range(0, n_triangles, 5000): @@ -227,15 +276,34 @@ def process_single_surface(filename, average_radius, output_dir): mins = find_mins(avg) ipk = x[np.argmax(avg)] - p0 = [0.02, ipk-0.5, 1.5, 0.02, ipk+0.5, 1.5, 0] - bounds = ([0.005, ipk-6, 0.8, 0.005, ipk-2, 0.8, -1], - [0.04, ipk+2, 2.2, 0.04, ipk+6, 2.2, 1]) - a = x[mins[0]+2:mins[1]-2] b = avg[mins[0]+2:mins[1]-2] - p3, _ = opt.curve_fit(dual_gaussian, a, b, p0, bounds=bounds) - width = np.abs(p3[1]-p3[4]) + # Fit the whole-surface average with the same shared-width, symmetric-window model + # that drives the per-triangle measurements: two equal-width leaflets symmetric + # about the center, so an asymmetric baseline cannot tilt the components and bias + # the center. Repackaged into the legacy (h1, c1, w1, h2, c2, w2, o) layout so the + # plot/CSV below are unchanged -- with w1 == w2 this equals dual_gaussian exactly. + # Quality gate as for the per-triangle fit: width is NaN unless the average + # resolves two leaflets and the fit is good and physical. + center_seed, half_seed, avg_n_resolved = _seed_bilayer_center(a, b) + try: + af, bf = _symmetric_fit_window(a, b, center_seed, half_seed) + pN, _ = opt.curve_fit( + _dual_gaussian_shared_width, af, bf, + [0.02, 0.02, 1.5, center_seed, half_seed, 0.0], + bounds=([0.005, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], + [0.04, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1])) + h1, h2, w, center, half, o = pN + p3 = np.array([h1, center - half, w, h2, center + half, w, o]) + width = 2.0 * half + r2_avg = _compute_r_squared(bf, _dual_gaussian_shared_width(af, *pN)) + if (avg_n_resolved < 2 or r2_avg <= R2_THRESHOLD + or not (MIN_THICKNESS <= width <= MAX_THICKNESS)): + width = np.nan + except Exception: + p3 = np.array([0.0, ipk, 1.0, 0.0, ipk, 1.0, 0.0]) + width = np.nan # Update graph with thickness properties average_width_prop = tg.graph.new_vertex_property("float") @@ -244,10 +312,16 @@ def process_single_surface(filename, average_radius, output_dir): thick.a = per_surface_thickness offset = tg.graph.new_vertex_property("float") offset.a = per_triangle_offset + # Per-triangle reliability flag for `thickness`: 1 = two leaflets cleanly + # resolved (high confidence); < MIN_LEAFLET_HEIGHT_RATIO (0.5) = thickness only + # obtained by prior recovery (lower confidence, reads slightly thin). + bilayer_resolution = tg.graph.new_vertex_property("float") + bilayer_resolution.a = per_triangle_resolution tg.graph.vp.average_width = average_width_prop tg.graph.vp.thickness = thick tg.graph.vp.offset = offset + tg.graph.vp.bilayer_resolution = bilayer_resolution tg.graph.save(graph_file_final) surf = tg.graph_to_triangle_poly() diff --git a/surface_morphometrics/refine_mesh.py b/surface_morphometrics/refine_mesh.py index 9767704..3efa6c1 100644 --- a/surface_morphometrics/refine_mesh.py +++ b/surface_morphometrics/refine_mesh.py @@ -46,7 +46,8 @@ from .measure_thickness import find_mins, dual_gaussian, monogaussian from ._thickness_worker import (init_worker, fit_triangle_chunk_offsets, init_local_thickness_worker, compute_thickness_chunk, - _dual_gaussian_centered, _seed_bilayer_center, + _dual_gaussian_centered, _dual_gaussian_shared_width, + _seed_bilayer_center, _symmetric_fit_window, MIN_THICKNESS, MAX_THICKNESS) from . import curvature @@ -851,19 +852,19 @@ def refine_mesh_iteration(graph_file, vtp_file, mrc_file, output_base, pixel_siz # peaks apart so the global fit cannot collapse into one leaflet. # Restrict the fit to a window around the seeded bilayer so distant # density (adjacent membranes, CTF ringing) cannot pull the fit out. - fit_window = MAX_THICKNESS / 2.0 + 2.0 - gmask = (ag >= center_seed - fit_window) & (ag <= center_seed + fit_window) - agf, bgf = (ag[gmask], bg[gmask]) if int(gmask.sum()) >= 7 else (ag, bg) - p0g = [0.02, 1.5, 0.02, 1.5, center_seed, half_seed, 0] - bounds_g = ([0.005, 0.8, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], - [0.04, 2.2, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) - popt_g, _ = opt.curve_fit(_dual_gaussian_centered, agf, bgf, p0g, bounds=bounds_g) - center_g, half_g = popt_g[4], popt_g[5] + agf, bgf = _symmetric_fit_window(ag, bg, center_seed, half_seed) + # Shared leaflet width: keeps asymmetric outer density from tilting + # the fit and shifting the bilayer center off the true midpoint. + p0g = [0.02, 0.02, 1.5, center_seed, half_seed, 0] + bounds_g = ([0.005, 0.005, 0.8, center_seed - 3.0, MIN_THICKNESS / 2.0, -1], + [0.04, 0.04, 2.2, center_seed + 3.0, MAX_THICKNESS / 2.0, 1]) + popt_g, _ = opt.curve_fit(_dual_gaussian_shared_width, agf, bgf, p0g, bounds=bounds_g) + center_g, half_g = popt_g[3], popt_g[4] c1_g, c2_g = center_g - half_g, center_g + half_g global_center_offset = center_g - global_sigma1 = popt_g[1] - global_sigma2 = popt_g[3] - global_fit_params = (c1_g, popt_g[1], c2_g, popt_g[3]) + global_sigma1 = popt_g[2] + global_sigma2 = popt_g[2] + global_fit_params = (c1_g, popt_g[2], c2_g, popt_g[2]) print(f" Global avg profile: c1={c1_g:.3f} nm, c2={c2_g:.3f} nm, " f"center={global_center_offset:+.3f} nm, " f"thickness={2.0 * half_g:.3f} nm") diff --git a/tests/test_thickness_worker.py b/tests/test_thickness_worker.py index 6a4a0e5..ba2e894 100644 --- a/tests/test_thickness_worker.py +++ b/tests/test_thickness_worker.py @@ -12,14 +12,12 @@ def _fit_bilayer(dat, x): rm = np.argmin(dat[mid:]) + mid a, b = x[lm + 2:rm - 2], dat[lm + 2:rm - 2] center_seed, half_seed, _ = tw._seed_bilayer_center(a, b) - window = tw.MAX_THICKNESS / 2.0 + 2.0 - m = (a >= center_seed - window) & (a <= center_seed + window) - af, bf = (a[m], b[m]) if m.sum() >= 5 else (a, b) - p0 = [0.02, 1.5, 0.02, 1.5, center_seed, half_seed, 0.0] - bounds = ([0.005, 0.8, 0.005, 0.8, center_seed - 3.0, tw.MIN_THICKNESS / 2.0, -1], - [0.04, 2.2, 0.04, 2.2, center_seed + 3.0, tw.MAX_THICKNESS / 2.0, 1]) - p, _ = opt.curve_fit(tw._dual_gaussian_centered, af, bf, p0, bounds=bounds) - return p[4], p[5] # center, half_sep + af, bf = tw._symmetric_fit_window(a, b, center_seed, half_seed) + p0 = [0.02, 0.02, 1.5, center_seed, half_seed, 0.0] + bounds = ([0.005, 0.005, 0.8, center_seed - 3.0, tw.MIN_THICKNESS / 2.0, -1], + [0.04, 0.04, 2.2, center_seed + 3.0, tw.MAX_THICKNESS / 2.0, 1]) + p, _ = opt.curve_fit(tw._dual_gaussian_shared_width, af, bf, p0, bounds=bounds) + return p[3], p[4] # center, half_sep def test_dual_gaussian_centered_is_two_separated_peaks(): @@ -61,6 +59,44 @@ def test_seed_rejects_small_shoulders_as_single_peak(): assert abs(center) < 0.3 # centered on the membrane, not a shoulder/midpoint +def test_seed_resolves_near_symmetric_bilayer_despite_prominence_asymmetry(): + # Regression: a well-resolved bilayer whose two leaflets are near-equal height + # (a hair apart) with a shallow inter-leaflet saddle. find_peaks assigns the + # marginally taller leaflet a large prominence (down to the solvent base) and + # the other only a small one (down to the saddle), so their prominence *ratio* + # is ~0.2 even though both are real leaflets. The seeder must still resolve two + # (it compares height above the shared base, not prominence) -- otherwise + # essentially every clean bilayer wrongly falls back to a single Gaussian. + x = np.linspace(-10, 10, 161) + b = (0.011 + + tw._monogaussian(x, 0.0062, -1.75, 1.0) + + tw._monogaussian(x, 0.0063, 1.75, 1.0)) # +1.75 leaflet a hair taller + mid = len(b) // 2 + lm = np.argmin(b[:mid]); rm = np.argmin(b[mid:]) + mid + a, bb = x[lm + 2:rm - 2], b[lm + 2:rm - 2] + center, half, n_resolved = tw._seed_bilayer_center(a, bb) + assert n_resolved == 2 + assert abs(center) < 0.3 and abs(2 * half - 3.5) < 0.6 + + +def test_seed_resolves_merged_bilayer_with_near_flat_saddle(): + # Regression (the OMM case): a thin/merged bilayer whose two leaflets are only + # ~2.3 nm apart with an almost-flat saddle between them. The second leaflet's + # find_peaks prominence is near zero, so any prominence floor drops it; the + # seeder must instead keep it on height-above-base + separation and resolve two. + x = np.linspace(-10, 10, 161) + b = (0.011 + + tw._monogaussian(x, 0.011, -1.25, 0.95) + + tw._monogaussian(x, 0.0112, 1.25, 0.95)) # near-equal, barely separated + mid = len(b) // 2 + lm = np.argmin(b[:mid]); rm = np.argmin(b[mid:]) + mid + a, bb = x[lm + 2:rm - 2], b[lm + 2:rm - 2] + # the inter-leaflet saddle is only just below the peaks (a near-flat dip) + center, half, n_resolved = tw._seed_bilayer_center(a, bb) + assert n_resolved == 2 + assert abs(center) < 0.4 and 2 * half >= tw.MIN_THICKNESS + + def test_seed_keeps_asymmetric_bilayer_as_two_leaflets(): # Genuinely asymmetric bilayer (one leaflet taller) must still resolve as two. x = np.linspace(-10, 10, 161) @@ -116,6 +152,28 @@ def test_fit_recovers_center_of_asymmetric_bilayer_with_confounder(): assert half >= tw.MIN_THICKNESS / 2.0 +def test_dual_gaussian_shared_width_ties_both_widths(): + # The shared-width model must equal the independent-width model with w1 == w2. + # Tying the widths is what stops the fit from absorbing asymmetric outer density + # into unequal leaflet widths (which, with a shared center, biases the center). + x = np.linspace(-8, 8, 200) + shared = tw._dual_gaussian_shared_width(x, 1.0, 0.8, 1.3, center=0.5, half_sep=2.0, o=0.1) + expected = tw._dual_gaussian_centered(x, 1.0, 1.3, 0.8, 1.3, center=0.5, half_sep=2.0, o=0.1) + assert np.allclose(shared, expected) + + +def test_symmetric_fit_window_is_balanced_about_seed(): + # When the profile extends further on one side of the seed than the other, the + # window must be trimmed to the shorter side so the fit sees an equal x-range each + # way -- an unbalanced range gives the least-squares fit more leverage on one side + # and pulls the center off the midpoint. + a = np.linspace(-4.0, 7.0, 111) # extends much further to the right + b = np.ones_like(a) + af, bf = tw._symmetric_fit_window(a, b, center_seed=0.0, half_seed=1.75) + assert abs(af.min() + af.max()) < 1e-6 # symmetric about 0 + assert af.min() >= -4.0 and af.max() <= 7.0 + + def test_fit_keeps_well_centered_bilayer_centered(): x = np.linspace(-10, 10, 161) dat = 0.011 + tw._monogaussian(x, 0.017, -2.0, 1.1) + tw._monogaussian(x, 0.017, 2.0, 1.1) @@ -123,6 +181,142 @@ def test_fit_keeps_well_centered_bilayer_centered(): assert abs(center) < 0.2 and abs(2 * half - 4.0) < 0.5 # no spurious move +def test_global_seed_still_gates_dual_per_triangle(): + # Even when a resolved global fit is supplied (its center/half seed every + # triangle), fit_triangle_chunk_offsets must still gate the dual fit on each + # triangle's own resolution. A genuinely single-peak (poorly-resolved) triangle + # must fall back to the single Gaussian (method 2), not be forced into a dual that + # invents a ~2 nm bilayer and -- for a skewed peak -- mis-centers the vertex. + x = np.linspace(-10, 10, 81) + bilayer = (0.01 + tw._monogaussian(x, 0.02, -1.75, 1.0) + + tw._monogaussian(x, 0.02, 1.75, 1.0)) # two leaflets + single_broad = 0.01 + tw._monogaussian(x, 0.02, 0.0, 2.2) # one broad peak + skewed = (0.01 + tw._monogaussian(x, 0.02, 0.6, 1.3) + + tw._monogaussian(x, 0.008, 3.2, 1.8)) # skewed single peak + profiles = np.vstack([bilayer, single_broad, skewed]) + n = len(profiles) + # Worker multiplies by -1; give each triangle exactly one neighbour (itself). + tw.init_worker(-profiles, np.zeros((n, 1)), np.arange(n).reshape(n, 1), x, + use_xcorr=False, global_fit_params=(-1.75, 1.4, 1.75, 1.4)) + try: + res = tw.fit_triangle_chunk_offsets(list(range(n))) + finally: + tw.init_worker(None, None, None, x) # reset module globals + methods = [r[3] for r in res] + centers = [r[0] for r in res] + assert methods == [1, 2, 2] # bilayer dual; single peaks -> single + assert abs(centers[0]) < 0.3 # bilayer centered on the midpoint + assert abs(centers[1]) < 0.4 # broad single peak centered on its peak + assert abs(centers[2]) < 2.0 # skewed peak not flung out to a fake leaflet + # the dual fit uses a single shared width, so both reported sigmas match + assert res[0][1] == res[0][2] + + +def test_thickness_quality_gate_reports_nan_for_unresolved_triangles(): + # measure_thickness's per-triangle fit (fit_triangle_chunk) must only report a + # thickness when two leaflets are resolved and the fit is physical. Single-peak, + # skewed, and sub-minimum-separation profiles have no valid bilayer thickness and + # must return NaN rather than a forced dual's spurious value. + x = np.linspace(-10, 10, 81) + bilayer = (0.01 + tw._monogaussian(x, 0.02, -1.75, 1.0) + + tw._monogaussian(x, 0.02, 1.75, 1.0)) # ~3.5 nm bilayer + single_broad = 0.01 + tw._monogaussian(x, 0.02, 0.0, 2.2) # one broad peak + skewed = (0.01 + tw._monogaussian(x, 0.02, 0.6, 1.3) + + tw._monogaussian(x, 0.008, 3.2, 1.8)) # skewed single peak + too_thin = (0.01 + tw._monogaussian(x, 0.02, -0.6, 1.0) + + tw._monogaussian(x, 0.02, 0.6, 1.0)) # ~1.2 nm < MIN_THICKNESS + profiles = np.vstack([bilayer, single_broad, skewed, too_thin]) + n = len(profiles) + tw.init_worker(-profiles, np.zeros((n, 1)), np.arange(n).reshape(n, 1), x, + use_xcorr=False, raw_average=True) + try: + res = tw.fit_triangle_chunk(list(range(n))) + finally: + tw.init_worker(None, None, None, x) + thicknesses = [r[0] for r in res] + assert tw.MIN_THICKNESS <= thicknesses[0] <= tw.MAX_THICKNESS # real bilayer measured + assert abs(thicknesses[0] - 3.5) < 0.6 + assert all(np.isnan(t) for t in thicknesses[1:]) # the rest gated to NaN + + +def test_recovery_tier_needs_global_prior_to_recover_merged_bilayer(): + # A real ~2.8 nm bilayer so merged that the seeder resolves only one leaflet + # (n_resolved < 2). With a global bilayer prior the recovery tier must accept it + # as a centered dual (method 1); without a prior the strict gate routes it to the + # single Gaussian (method 2). This is the "1 in 3 -> most" middle ground: merged + # bilayers on a resolved surface still go through dual, anomalies do not. + x = np.linspace(-10, 10, 81) + merged = (0.01 + tw._monogaussian(x, 0.02, -1.4, 1.3) + + tw._monogaussian(x, 0.02, 1.4, 1.3)) + mid = len(merged) // 2 + lm = np.argmin(merged[:mid]); rm = np.argmin(merged[mid:]) + mid + a, b = x[lm + 2:rm - 2], merged[lm + 2:rm - 2] + assert tw._seed_bilayer_center(a, b)[2] < 2 # locally merged: one leaflet + + prof = -merged[None, :] + tw.init_worker(prof, np.zeros((1, 1)), np.array([[0]]), x, use_xcorr=False, + global_fit_params=(-1.4, 1.3, 1.4, 1.3)) + try: + r_prior = tw.fit_triangle_chunk_offsets([0])[0] + finally: + tw.init_worker(None, None, None, x) + tw.init_worker(prof, np.zeros((1, 1)), np.array([[0]]), x, + use_xcorr=False, global_fit_params=None) + try: + r_noprior = tw.fit_triangle_chunk_offsets([0])[0] + finally: + tw.init_worker(None, None, None, x) + + assert r_prior[3] == 1 # prior -> recovered as dual + assert abs(r_prior[0]) < 0.3 # centered on the midpoint + assert tw.MIN_THICKNESS <= 2 * r_prior[1] <= tw.MAX_THICKNESS + assert r_noprior[3] == 2 # no prior -> single Gaussian + + +def test_thickness_recovery_and_resolution_score(): + # measure_thickness path with a global bilayer prior: a clearly-resolved bilayer + # is measured at high confidence (score ~1), a merged bilayer is recovered via the + # prior but flagged low-confidence (score < 0.5), and a genuine single peak is + # still NaN. The resolution score lets downstream separate the two. + x = np.linspace(-10, 10, 81) + resolved = (0.01 + tw._monogaussian(x, 0.02, -1.75, 1.0) + + tw._monogaussian(x, 0.02, 1.75, 1.0)) + merged = (0.01 + tw._monogaussian(x, 0.02, -1.4, 1.3) + + tw._monogaussian(x, 0.02, 1.4, 1.3)) + single = 0.01 + tw._monogaussian(x, 0.02, 0.0, 2.4) + profiles = np.vstack([resolved, merged, single]) + n = len(profiles) + tw.init_worker(-profiles, np.zeros((n, 1)), np.arange(n).reshape(n, 1), x, + use_xcorr=False, global_fit_params=(-1.75, 1.4, 1.75, 1.4)) + try: + res = tw.fit_triangle_chunk(list(range(n))) + finally: + tw.init_worker(None, None, None, x) + (t_res, _, s_res), (t_mer, _, s_mer), (t_sin, _, s_sin) = res + # resolved: measured, high-confidence score + assert tw.MIN_THICKNESS <= t_res <= tw.MAX_THICKNESS and s_res >= 0.5 + # merged: recovered (measured) but flagged low-confidence + assert tw.MIN_THICKNESS <= t_mer <= tw.MAX_THICKNESS and s_mer < 0.5 + # genuine single peak: no thickness + assert np.isnan(t_sin) + + +def test_thickness_no_prior_stays_strict(): + # Without a global prior the thickness path stays strict: the merged bilayer that + # the recovery tier would rescue is NaN instead. + x = np.linspace(-10, 10, 81) + merged = (0.01 + tw._monogaussian(x, 0.02, -1.4, 1.3) + + tw._monogaussian(x, 0.02, 1.4, 1.3)) + tw.init_worker(-merged[None, :], np.zeros((1, 1)), np.array([[0]]), x, + use_xcorr=False, global_fit_params=None) + try: + thk, _, score = tw.fit_triangle_chunk([0])[0] + finally: + tw.init_worker(None, None, None, x) + assert np.isnan(thk) # no prior -> no recovery + assert score < 0.5 # but the score still reports it was unresolved + + def test_monogaussian_peak_and_symmetry(): x = np.linspace(-10, 10, 401) y = tw._monogaussian(x, h=2.0, c=1.0, w=1.5) From 092baa763861ea7bbca89902e9e34352e0e08699 Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Mon, 22 Jun 2026 21:55:18 -0700 Subject: [PATCH 07/11] accept_refinement: fall back to last available iteration on early convergence Refinement stops early once a surface converges, so different surfaces can have different numbers of _refined_iter* files. When the requested STEP is not available for a surface, use the largest available iteration <= STEP and print a warning rather than skipping the surface. Also fix the stale "python run_pycurv.py" hint to the current "morphometrics pycurv" command. Co-Authored-By: Claude Opus 4.8 --- surface_morphometrics/accept_refinement.py | 45 +++++++++++++++++----- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/surface_morphometrics/accept_refinement.py b/surface_morphometrics/accept_refinement.py index f9a66b3..9e7aaf8 100644 --- a/surface_morphometrics/accept_refinement.py +++ b/surface_morphometrics/accept_refinement.py @@ -25,6 +25,7 @@ __license__ = "GPLv3" import os +import re from glob import glob from pathlib import Path @@ -32,6 +33,21 @@ import yaml +def _iterations_by_surface(work_dir): + """Map each surface basename to its sorted available refinement iterations. + + Refinement stops early once a surface converges, so different surfaces may + have different numbers of `_refined_iter*` files. + """ + pattern = re.compile(r"^(.*)_refined_iter(\d+)\.surface\.vtp$") + out = {} + for path in glob(f"{work_dir}*_refined_iter*.surface.vtp"): + match = pattern.match(os.path.basename(path)) + if match: + out.setdefault(match.group(1), []).append(int(match.group(2))) + return {base: sorted(iters) for base, iters in out.items()} + + # The canonical surface files the downstream pipeline consumes. Only these are # backed up (from the original) and promoted (from the accepted iteration). # {rh} is filled in with the configured radius_hit. @@ -200,11 +216,10 @@ def accept_refinement_cli(configfile, step, component_name, tomogram, dry_run): print("DRY RUN - no files will be changed") print("-" * 60) - # Discover surfaces that have the requested iteration by finding their - # accepted-step surface files, then strip the tag to recover the basename. - iter_tag = f"_refined_iter{step}.surface.vtp" - candidates = sorted(glob(f"{work_dir}*{iter_tag}")) - basenames = [os.path.basename(p)[:-len(iter_tag)] for p in candidates] + # Discover every refined surface and the iterations it actually has (a surface + # that converged early stops before `step`). + iters_by_surface = _iterations_by_surface(work_dir) + basenames = sorted(iters_by_surface) # Apply optional component / tomogram filters on the basename. if tomogram: @@ -213,14 +228,23 @@ def accept_refinement_cli(configfile, step, component_name, tomogram, dry_run): basenames = [b for b in basenames if b.endswith(f"_{component_name}")] if not basenames: - print(f"No surfaces with a refined iteration {step} found in {work_dir}" - + (f" matching the given filters." if (component_name or tomogram) else ".")) + print(f"No refined surfaces found in {work_dir}" + + (" matching the given filters." if (component_name or tomogram) else ".")) return accepted = 0 needs_pycurv = [] for basename in basenames: - result = accept_one(work_dir, basename, step, radius_hit, dry_run) + available = iters_by_surface[basename] + # Use the requested iteration, or the last available one if refinement + # converged before it (largest available iteration <= step). + usable = [i for i in available if i <= step] or available + effective_step = usable[-1] + if effective_step != step: + print(f" WARNING: {basename}: iteration {step} not available - refinement " + f"converged early at iteration {effective_step}; using iteration " + f"{effective_step}.") + result = accept_one(work_dir, basename, effective_step, radius_hit, dry_run) if result is not None: accepted += 1 if result is False: @@ -229,7 +253,8 @@ def accept_refinement_cli(configfile, step, component_name, tomogram, dry_run): print("-" * 60) verb = "Would accept" if dry_run else "Accepted" - print(f"{verb} iteration {step} for {accepted} surface(s).") + print(f"{verb} refinement for {accepted} surface(s) (requested iteration {step}; " + "early-converged surfaces used their last iteration).") if accepted: print("Originals backed up with a .orig.bak suffix; only canonical surfaces " "and refinement summaries were kept.") @@ -239,7 +264,7 @@ def accept_refinement_cli(configfile, step, component_name, tomogram, dry_run): print("iteration and have no curvature graph. Re-run pycurv before any downstream") print("step (distances, density sampling, thickness):") for basename in needs_pycurv: - print(f" python run_pycurv.py {configfile} {basename}.surface.vtp") + print(f" morphometrics pycurv {configfile} {basename}.surface.vtp") if __name__ == "__main__": From 73ae93320b9009d0f82affa7ffe763f42d1ab816 Mon Sep 17 00:00:00 2001 From: Benjamin Barad Date: Fri, 26 Jun 2026 10:28:27 -0700 Subject: [PATCH 08/11] Centralize config loading: normalize dir paths and validate required keys Config handling was scattered and inconsistent across commands: directory paths had to end in "/" or string-concatenated paths broke, and missing settings either raised a raw KeyError traceback (older commands using config["seg_dir"]) or were silently defaulted, with no uniform behavior. Add surface_morphometrics/config_utils.py with load_config(configfile, require=()): * normalize_dirs appends a trailing "/" to seg_dir / tomo_dir / work_dir when set but missing one, so a config written without trailing slashes (e.g. work_dir: ./morphometrics) behaves identically to one with them. * require_keys raises a single clean click.ClickException listing every missing or empty required setting (with its purpose), instead of a KeyError partway through a run. Route all 13 commands through load_config and declare per-command required keys: seg_dir+work_dir for the mesh/curvature/distance/stats steps, work_dir+tomo_dir for the tomogram steps (sample_density, refine_mesh) so a missing tomo_dir fails cleanly there, and work_dir only for the analysis/export steps. Remove the old per-command dir-validation blocks (the KeyError-prone bracket checks, redundant slash handling, and silent work_dir=seg_dir fallbacks); segmentation_to_meshes validates after its data_dir->seg_dir migration warning so migrating users still see the hint. Co-Authored-By: Claude Opus 4.8 --- surface_morphometrics/accept_refinement.py | 5 +- surface_morphometrics/config_utils.py | 74 +++++++++++++++++++ surface_morphometrics/export_obj.py | 5 +- surface_morphometrics/extract_patches.py | 5 +- surface_morphometrics/generate_patches.py | 5 +- .../label_connected_components.py | 5 +- .../measure_distances_orientations.py | 15 +--- surface_morphometrics/measure_thickness.py | 5 +- surface_morphometrics/morphometrics_stats.py | 43 +++++------ surface_morphometrics/patch_statistics.py | 5 +- surface_morphometrics/refine_mesh.py | 19 ++--- surface_morphometrics/run_pycurv.py | 12 +-- surface_morphometrics/sample_density.py | 23 ++---- .../segmentation_to_meshes.py | 16 ++-- 14 files changed, 136 insertions(+), 101 deletions(-) create mode 100644 surface_morphometrics/config_utils.py diff --git a/surface_morphometrics/accept_refinement.py b/surface_morphometrics/accept_refinement.py index 9e7aaf8..3cc6d04 100644 --- a/surface_morphometrics/accept_refinement.py +++ b/surface_morphometrics/accept_refinement.py @@ -32,6 +32,8 @@ import click import yaml +from .config_utils import load_config + def _iterations_by_surface(work_dir): """Map each surface basename to its sorted available refinement iterations. @@ -201,8 +203,7 @@ def accept_refinement_cli(configfile, step, component_name, tomogram, dry_run): graph, the command warns that pycurv must be re-run on the promoted surface before any downstream analysis. """ - with open(configfile) as f: - config = yaml.safe_load(f) + config = load_config(configfile, require=("work_dir",)) work_dir = config.get("work_dir", config.get("seg_dir", "./")) if not work_dir.endswith("/"): diff --git a/surface_morphometrics/config_utils.py b/surface_morphometrics/config_utils.py new file mode 100644 index 0000000..bfdc44e --- /dev/null +++ b/surface_morphometrics/config_utils.py @@ -0,0 +1,74 @@ +#! /usr/bin/env python +"""Shared helpers for loading the pipeline config.yml.""" + +__author__ = "Benjamin Barad" +__email__ = "benjamin.barad@gmail.com" +__license__ = "GPLv3" + +import click +import yaml + +# Directory settings that the pipeline joins with filenames by plain string +# concatenation (e.g. work_dir + "*.gt"), so they must end in a path separator. +DIR_KEYS = ("seg_dir", "tomo_dir", "work_dir") + +# Human-readable purpose of each setting, used in the missing-required-key error. +_KEY_HELP = { + "seg_dir": "directory of segmentation MRC files", + "work_dir": "directory for pipeline output files", + "tomo_dir": "directory of tomogram MRC files (needed for density sampling, " + "thickness, and mesh refinement)", +} + + +def normalize_dirs(config): + """Append a trailing "/" to any set directory path that lacks one. + + Mutates and returns ``config``. Only ``seg_dir``/``tomo_dir``/``work_dir`` that + are non-empty strings are touched; empty or unset values are left alone so each + command can apply its own defaulting. + """ + if isinstance(config, dict): + for key in DIR_KEYS: + value = config.get(key) + if isinstance(value, str) and value and not value.endswith("/"): + config[key] = value + "/" + return config + + +def require_keys(config, required, configfile=""): + """Raise a clean CLI error if any ``required`` top-level key is unset. + + A key counts as unset if it is missing or maps to an empty/None value. All + missing keys are reported together so the user fixes the config in one pass. + ``required`` is per-command: each command demands only the settings it uses + (e.g. tomogram steps require ``tomo_dir``; analysis/export steps need only + ``work_dir``). + """ + missing = [key for key in required if not (isinstance(config, dict) and config.get(key))] + if missing: + target = configfile or "config.yml" + details = "\n".join( + f" - {key}" + (f" ({_KEY_HELP[key]})" if key in _KEY_HELP else "") + for key in missing + ) + raise click.ClickException( + f"{target} is missing required setting(s):\n{details}\n" + "Add them to your config.yml (start one with `morphometrics new_config`)." + ) + + +def load_config(configfile, require=()): + """Load a pipeline config YAML, normalizing directory paths and validating. + + A config written without trailing slashes (e.g. ``work_dir: ./morphometrics``) + behaves identically to one with them, since the pipeline builds paths by string + concatenation. ``require`` is a tuple of top-level keys this command needs; if any + is missing the user gets one clean error (see :func:`require_keys`) instead of a + ``KeyError`` traceback partway through the run. + """ + with open(configfile) as handle: + config = yaml.safe_load(handle) or {} + normalize_dirs(config) + require_keys(config, require, configfile) + return config diff --git a/surface_morphometrics/export_obj.py b/surface_morphometrics/export_obj.py index 2ee2148..b502268 100644 --- a/surface_morphometrics/export_obj.py +++ b/surface_morphometrics/export_obj.py @@ -30,6 +30,8 @@ import numpy as np import yaml +from .config_utils import load_config + # --------------------------------------------------------------------------- # VTP reading (vtk) @@ -241,8 +243,7 @@ def export_obj_cli(configfile, vtp, feature, cmap, vmin, vmax, nan_color, patter CONFIGFILE: path to config.yml. VTP: a surface .vtp to export; if omitted, all matching surfaces in work_dir. """ - with open(configfile) as f: - config = yaml.safe_load(f) + config = load_config(configfile, require=("work_dir",)) work_dir = config.get("work_dir", config.get("seg_dir", "./")) if not work_dir.endswith("/"): work_dir += "/" diff --git a/surface_morphometrics/extract_patches.py b/surface_morphometrics/extract_patches.py index 5a33827..566d8a1 100644 --- a/surface_morphometrics/extract_patches.py +++ b/surface_morphometrics/extract_patches.py @@ -29,6 +29,8 @@ import numpy as np import yaml +from .config_utils import load_config + def mask_from_property(values, vmin=None, vmax=None): """Boolean mask of triangles whose property is within [vmin, vmax]. @@ -144,8 +146,7 @@ def extract_patches_cli(configfile, graph_file, label_col, prop, vmin, vmax, if (label_col is None) == (prop is None): raise click.UsageError("Specify exactly one of --by