From 54e5b203d0477570393a9f858927eaf34544253c Mon Sep 17 00:00:00 2001 From: kalidke Date: Wed, 27 May 2026 15:46:57 -0600 Subject: [PATCH] Add track_length range filter to frame connection Add optional (min, max) localizations-per-track filter on FrameConnectConfig.track_length (Union{Tuple{Float64,Float64},Nothing}, nothing = disabled). Tracks outside the inclusive range are dropped during combinelocalizations: (2.0, Inf) removes single-frame blinks, while a finite upper bound like (2.0, 50.0) also removes over-long tracks (fiducials, sticky docking strands in dense DNA-PAINT). Filtering is offset-safe (ncumulative spans all localizations, a keep-mask selects emitted clusters) and only affects the combined output; info.connected retains all tracks as full ground-truth. FrameConnectInfo gains n_filtered, a cause-attributed count of tracks dropped by the range filter. Docs (docstrings, README, docs/src/index.md, api_overview.md, CLAUDE.md) and tests updated; 184 tests pass. Co-Authored-By: Claude Opus 4.7 --- CLAUDE.md | 2 +- Project.toml | 2 +- README.md | 3 +- api_overview.md | 8 +++-- docs/src/index.md | 4 ++- src/combinelocalizations.jl | 52 ++++++++++++++++++--------- src/connectinfo.jl | 9 +++-- src/frameconnect.jl | 19 +++++++++- src/structdefinitions.jl | 9 +++++ test/test_combinelocalizations.jl | 58 +++++++++++++++++++++++++++++++ test/test_types.jl | 3 +- 11 files changed, 142 insertions(+), 27 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 42d5c65..1285ae8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -39,7 +39,7 @@ Test files must `include("test/test_helpers.jl")` first since `runtests.jl` load 4. **Calibrate** (optional, `calibration.jl`): When `config.calibration` is set, analyzes frame-to-frame jitter within connected tracks to estimate motion variance (`σ_motion²`) and CRLB scale factor (`k²`). Applies corrected uncertainties `Σ_corrected = σ_motion² I + k² Σ_CRLB` before combination. Falls back gracefully (identity correction) if insufficient data or poor fit. -5. **Combine** (`combinelocalizations.jl`): MLE weighted mean using full 2x2 covariance (precision-weighted). Produces higher-precision output localizations. +5. **Combine** (`combinelocalizations.jl`): MLE weighted mean using full 2x2 covariance (precision-weighted). Produces higher-precision output localizations. The `track_length` config (inclusive `(min, max)` range, `nothing`=disabled) drops tracks whose localization count falls outside the range here (offset-safe: `ncumulative` spans all localizations, a `keep` mask selects which clusters to emit) — `info.connected` retains all tracks, so filtering only affects the combined output. `info.n_filtered` reports the count dropped. ### Data Flow diff --git a/Project.toml b/Project.toml index bb182f9..6cdd26b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "SMLMFrameConnection" uuid = "1517b1a9-81e8-461f-b994-92eb29599690" -version = "0.3.0" +version = "0.3.1" authors = ["klidke@unm.edu"] [deps] diff --git a/README.md b/README.md index 91933da..9d652db 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ For complete SMLM workflows (detection + fitting + frame-connection + rendering) | `max_sigma_dist` | 5.0 | Sigma multiplier for preclustering distance threshold | | `max_frame_gap` | 5 | Maximum frame gap for temporal adjacency | | `max_neighbors` | 2 | Maximum nearest-neighbors for precluster membership | +| `track_length` | `nothing` | Inclusive `(min, max)` range on localizations per track; tracks outside it are dropped (`nothing` disables). `info.n_filtered` reports the count dropped | ```julia # Keyword form (most common) @@ -48,7 +49,7 @@ config = FrameConnectConfig(max_frame_gap=10, max_sigma_dist=3.0) (combined, info) = frameconnect(smld, config) ``` -**Parameter guidance:** Default values work well for standard dSTORM/PALM data. For dense samples, reduce `max_sigma_dist` to 3.0. For long dark states (dSTORM), increase `max_frame_gap` to 10-20. +**Parameter guidance:** Default values work well for standard dSTORM/PALM data. For dense samples, reduce `max_sigma_dist` to 3.0. For long dark states (dSTORM), increase `max_frame_gap` to 10-20. To discard single-frame blinks set `track_length=(2.0, Inf)`; a finite upper bound like `(2.0, 50.0)` also removes over-long tracks such as fiducials or sticky docking strands in dense DNA-PAINT. ## Output Format diff --git a/api_overview.md b/api_overview.md index 48ceb65..8ac769d 100644 --- a/api_overview.md +++ b/api_overview.md @@ -39,6 +39,8 @@ For simulated data where `track_id` indicates ground-truth emitter ID. Useful fo max_sigma_dist::Float64 = 5.0 # Sigma multiplier for preclustering distance threshold max_frame_gap::Int = 5 # Maximum frame gap for temporal adjacency max_neighbors::Int = 2 # Maximum nearest-neighbors for precluster membership + track_length::Union{Tuple{Float64,Float64}, Nothing} = nothing # Inclusive (min,max) locs-per-track filter; nothing disables + calibration::Union{CalibrationConfig, Nothing} = nothing # Optional uncertainty calibration end ``` Configuration parameters for frame connection. Use with `frameconnect(smld, config)` or pass fields as kwargs to `frameconnect(smld; kwargs...)`. @@ -58,8 +60,9 @@ config = FrameConnectConfig(max_frame_gap=10, max_sigma_dist=3.0) struct FrameConnectInfo{T} <: AbstractSMLMInfo connected::BasicSMLD{T} # Input with track_id assigned (uncombined) n_input::Int # Number of input localizations - n_tracks::Int # Number of tracks formed - n_combined::Int # Number of output localizations + n_tracks::Int # Number of tracks formed (pre length-filter) + n_combined::Int # Number of output localizations (post length-filter) + n_filtered::Int # Tracks dropped outside track_length range (0 if disabled) k_on::Float64 # Rate: dark → visible (1/frame) k_off::Float64 # Rate: visible → dark (1/frame) k_bleach::Float64 # Rate: photobleaching (1/frame) @@ -68,6 +71,7 @@ struct FrameConnectInfo{T} <: AbstractSMLMInfo elapsed_s::Float64 # Wall time in seconds algorithm::Symbol # Algorithm used (:lap) n_preclusters::Int # Number of preclusters formed + calibration::Union{CalibrationResult, Nothing} # Calibration diagnostics (nothing if disabled) end ``` diff --git a/docs/src/index.md b/docs/src/index.md index 368acf5..9e96682 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -61,12 +61,14 @@ frameconnect(smld; n_density_neighbors = 2, # Clusters for density estimation max_sigma_dist = 5.0, # Distance threshold multiplier max_frame_gap = 5, # Max frame gap for connections - max_neighbors = 2 # Nearest neighbors for preclustering + max_neighbors = 2, # Nearest neighbors for preclustering + track_length = nothing # Inclusive (min,max) locs-per-track filter; nothing disables ) ``` - `max_sigma_dist`: Higher values allow connections over larger distances - `max_frame_gap`: Increase for dyes with long dark states (dSTORM: 10-20) +- `track_length`: `(2.0, Inf)` drops single-frame blinks; `(2.0, 50.0)` also drops over-long tracks (fiducials, sticky strands). Count dropped is reported in `info.n_filtered` ## API Reference diff --git a/src/combinelocalizations.jl b/src/combinelocalizations.jl index 4da2016..5e87635 100644 --- a/src/combinelocalizations.jl +++ b/src/combinelocalizations.jl @@ -2,7 +2,8 @@ using SMLMData using StatsBase """ - smld_combined = combinelocalizations(smld::BasicSMLD{T,E}) where {T, E<:SMLMData.AbstractEmitter} + smld_combined = combinelocalizations(smld::BasicSMLD{T,E}; + track_length::Union{Tuple{Float64,Float64},Nothing}=nothing) where {T, E<:SMLMData.AbstractEmitter} Combine clustered localizations in `smld` into higher precision localizations. @@ -10,8 +11,16 @@ Combine clustered localizations in `smld` into higher precision localizations. This function combines localizations in `smld` that share the same value of track_id. Localizations are combined assuming they arose from independent measurements of the same position with Gaussian errors. + +`track_length` is an inclusive `(min, max)` range on the number of localizations +per track: a track is emitted only if `min <= nperID <= max`. `nothing` (the +default) disables filtering. This counts localizations sharing a `track_id`, not +temporal frame-span. Use `(2.0, Inf)` to drop single-frame blinks, or a finite +upper bound like `(2.0, 50.0)` to also drop over-long tracks (e.g. fiducials or +sticky docking strands in dense DNA-PAINT data). """ -function combinelocalizations(smld::BasicSMLD{T,E}) where {T, E<:SMLMData.AbstractEmitter} +function combinelocalizations(smld::BasicSMLD{T,E}; + track_length::Union{Tuple{Float64,Float64},Nothing}=nothing) where {T, E<:SMLMData.AbstractEmitter} # Extract arrays from emitters emitters = smld.emitters connectID = [e.track_id for e in emitters] @@ -33,8 +42,17 @@ function combinelocalizations(smld::BasicSMLD{T,E}) where {T, E<:SMLMData.Abstra # Loop over clusters and combine their localization data as appropriate. nperID = counts(connectID) nperID = nperID[nperID .!= 0] + # ncumulative is built over the FULL set of present clusters so its offsets + # span every localization in the sorted arrays. The length filter only + # selects which clusters to emit (`keep`); it must not alter these offsets. ncumulative = [0; cumsum(nperID)] - nclusters = length(nperID) + keep = if track_length === nothing + collect(1:length(nperID)) + else + lo, hi = track_length + findall(n -> lo <= n <= hi, nperID) + end + nclusters = length(keep) # Pre-allocate arrays for combined values # Use emitter's native precision, not SMLD's type parameter @@ -53,7 +71,7 @@ function combinelocalizations(smld::BasicSMLD{T,E}) where {T, E<:SMLMData.Abstra datasetnum_combined = Vector{Int}(undef, nclusters) id_combined = Vector{Int}(undef, nclusters) - for nn in 1:nclusters + for (out_idx, nn) in enumerate(keep) indices = (1:nperID[nn]) .+ ncumulative[nn] # Combine positions using precision-weighted mean with full 2x2 covariance. @@ -88,22 +106,22 @@ function combinelocalizations(smld::BasicSMLD{T,E}) where {T, E<:SMLMData.Abstra Σ_comb_12 = -P_sum[1,2] / det_P # σ_xy # Combined position (cast back to emitter precision) - x_combined[nn] = ET(Σ_comb_11 * μ_weighted[1] + Σ_comb_12 * μ_weighted[2]) - y_combined[nn] = ET(Σ_comb_12 * μ_weighted[1] + Σ_comb_22 * μ_weighted[2]) - σ_x_combined[nn] = ET(sqrt(Σ_comb_11)) - σ_y_combined[nn] = ET(sqrt(Σ_comb_22)) - σ_xy_combined[nn] = ET(Σ_comb_12) + x_combined[out_idx] = ET(Σ_comb_11 * μ_weighted[1] + Σ_comb_12 * μ_weighted[2]) + y_combined[out_idx] = ET(Σ_comb_12 * μ_weighted[1] + Σ_comb_22 * μ_weighted[2]) + σ_x_combined[out_idx] = ET(sqrt(Σ_comb_11)) + σ_y_combined[out_idx] = ET(sqrt(Σ_comb_22)) + σ_xy_combined[out_idx] = ET(Σ_comb_12) # Photons and background: sum (independent measurements) - photons_combined[nn] = sum(photons[indices]) - σ_photons_combined[nn] = sqrt(sum(σ_photons[indices] .^ 2)) - bg_combined[nn] = sum(bg[indices]) - σ_bg_combined[nn] = sqrt(sum(σ_bg[indices] .^ 2)) + photons_combined[out_idx] = sum(photons[indices]) + σ_photons_combined[out_idx] = sqrt(sum(σ_photons[indices] .^ 2)) + bg_combined[out_idx] = sum(bg[indices]) + σ_bg_combined[out_idx] = sqrt(sum(σ_bg[indices] .^ 2)) - connectID_combined[nn] = connectID[indices[1]] - framenum_combined[nn] = framenum[indices[1]] - datasetnum_combined[nn] = datasetnum[indices[1]] - id_combined[nn] = id[indices[1]] + connectID_combined[out_idx] = connectID[indices[1]] + framenum_combined[out_idx] = framenum[indices[1]] + datasetnum_combined[out_idx] = datasetnum[indices[1]] + id_combined[out_idx] = id[indices[1]] end # Create new emitters for combined results diff --git a/src/connectinfo.jl b/src/connectinfo.jl index 7101127..268273c 100644 --- a/src/connectinfo.jl +++ b/src/connectinfo.jl @@ -8,8 +8,12 @@ Secondary output from `frameconnect()` containing track assignments and algorith # Fields - `connected::BasicSMLD{T}`: Input SMLD with track_id assigned (localizations uncombined) - `n_input::Int`: Number of input localizations -- `n_tracks::Int`: Number of tracks formed -- `n_combined::Int`: Number of output localizations +- `n_tracks::Int`: Number of tracks formed (connection-stage count, pre length-filter) +- `n_combined::Int`: Number of output localizations (post length-filter) +- `n_filtered::Int`: Number of tracks dropped for falling outside the `track_length` + range. When `track_length === nothing` (default) this is `0`. Note + `n_tracks - n_combined == n_filtered` while the length filter + is the only cause of dropped tracks. - `k_on::Float64`: Estimated on rate (1/frame) - `k_off::Float64`: Estimated off rate (1/frame) - `k_bleach::Float64`: Estimated bleach rate (1/frame) @@ -47,6 +51,7 @@ struct FrameConnectInfo{T} <: AbstractSMLMInfo n_input::Int n_tracks::Int n_combined::Int + n_filtered::Int k_on::Float64 k_off::Float64 k_bleach::Float64 diff --git a/src/frameconnect.jl b/src/frameconnect.jl index a5ad7b8..dee516c 100644 --- a/src/frameconnect.jl +++ b/src/frameconnect.jl @@ -28,6 +28,11 @@ using their MLE position estimate assuming Gaussian noise. in a precluster (see `precluster`) - `max_neighbors::Int=2`: Maximum number of nearest-neighbors inspected for precluster membership (see `precluster`) +- `track_length::Union{Tuple{Float64,Float64},Nothing}=nothing`: Inclusive `(min, max)` + range on localizations per track; tracks outside it are dropped. `nothing` + is a no-op. `(2.0, Inf)` removes single-frame blinks; `(2.0, 50.0)` also + drops over-long tracks (fiducials, sticky strands). The number dropped is + reported as `info.n_filtered` (see [`combinelocalizations`](@ref)). # Returns A tuple `(combined, info)`: @@ -115,17 +120,29 @@ function frameconnect(smld::BasicSMLD{T,E}, config::FrameConnectConfig) where {T end # Combine the connected localizations into higher precision localizations. - smld_combined = combinelocalizations(smld_to_combine) + smld_combined = combinelocalizations(smld_to_combine; + track_length=config.track_length) elapsed_s = time() - t_start # Build FrameConnectInfo n_tracks = length(unique(connectID_final)) + + # Tracks dropped specifically by the length filter (cause-attributed count, + # not n_tracks - n_combined, so it stays correct if combine ever drops + # clusters for other reasons). connectID_final is compressed to 1:n_tracks. + n_filtered = if config.track_length === nothing + 0 + else + lo, hi = config.track_length + count(n -> !(lo <= n <= hi), counts(connectID_final)) + end info = FrameConnectInfo{T}( smld_connected, length(smld.emitters), n_tracks, length(smld_combined.emitters), + n_filtered, params.k_on, params.k_off, params.k_bleach, diff --git a/src/structdefinitions.jl b/src/structdefinitions.jl index 33f366e..780c613 100644 --- a/src/structdefinitions.jl +++ b/src/structdefinitions.jl @@ -80,6 +80,14 @@ Configuration parameters for frame connection algorithm. in a precluster (see `precluster`) - `max_neighbors::Int=2`: Maximum number of nearest-neighbors inspected for precluster membership (see `precluster`) +- `track_length::Union{Tuple{Float64,Float64}, Nothing}=nothing`: Inclusive `(min, max)` + range on the number of localizations a track must contain to be emitted + as a combined localization (`lo <= nperID <= hi`). Counts localizations + sharing a `track_id`, not temporal frame-span. `nothing` (default) disables + filtering. Use `(2.0, Inf)` to drop single-frame blinks, or `(2.0, 50.0)` + to also drop over-long tracks (e.g. fiducials, sticky docking strands in + dense DNA-PAINT). Mirrors the `(min, max)` filter idiom used elsewhere in + the ecosystem. See `combinelocalizations`. - `calibration::Union{CalibrationConfig, Nothing}=nothing`: Optional uncertainty calibration # Example @@ -103,6 +111,7 @@ Base.@kwdef struct FrameConnectConfig <: AbstractSMLMConfig max_sigma_dist::Float64 = 5.0 max_frame_gap::Int = 5 max_neighbors::Int = 2 + track_length::Union{Tuple{Float64,Float64}, Nothing} = nothing calibration::Union{CalibrationConfig, Nothing} = nothing end diff --git a/test/test_combinelocalizations.jl b/test/test_combinelocalizations.jl index 922e50b..93b9be0 100644 --- a/test/test_combinelocalizations.jl +++ b/test/test_combinelocalizations.jl @@ -170,4 +170,62 @@ @test result.n_frames == smld.n_frames @test result.n_datasets == smld.n_datasets end + + @testset "track_length range filtering" begin + # make_prelabeled_smld: track 1 (2 locs), track 2 (3 locs), track 3 (1 loc) + smld = make_prelabeled_smld() + + # Default (nothing) is a no-op + @test length(combinelocalizations(smld).emitters) == 3 + @test length(combinelocalizations(smld; track_length=nothing).emitters) == 3 + + # (2, Inf) drops the single-localization track (track 3) + r2 = combinelocalizations(smld; track_length=(2.0, Inf)) + @test length(r2.emitters) == 2 + @test Set(e.track_id for e in r2.emitters) == Set([1, 2]) + + # (3, Inf) keeps only the 3-localization track (track 2) + r3 = combinelocalizations(smld; track_length=(3.0, Inf)) + @test length(r3.emitters) == 1 + @test r3.emitters[1].track_id == 2 + + # (4, Inf) drops everything + @test isempty(combinelocalizations(smld; track_length=(4.0, Inf)).emitters) + + # Upper bound: (1, 2) keeps tracks 1 (2 locs) and 3 (1 loc), drops track 2 (3 locs) + rhi = combinelocalizations(smld; track_length=(1.0, 2.0)) + @test Set(e.track_id for e in rhi.emitters) == Set([1, 3]) + + # Two-sided window: (2, 2) keeps only the 2-localization track (track 1) + rwin = combinelocalizations(smld; track_length=(2.0, 2.0)) + @test length(rwin.emitters) == 1 + @test rwin.emitters[1].track_id == 1 + end + + @testset "offset-safety dropping a middle cluster" begin + # Drop a cluster that is neither first nor last: the surviving clusters' + # values must stay correct, which exercises that ncumulative offsets + # still span every localization in the sorted arrays. + σ = 0.02 + emitters = [ + make_emitter(5.0, 5.0, 1; σ_pos=σ, track_id=1, photons=1000.0), + make_emitter(5.0, 5.0, 2; σ_pos=σ, track_id=1, photons=1000.0), + make_emitter(10.0, 10.0, 1; σ_pos=σ, track_id=2, photons=777.0), # dropped + make_emitter(15.0, 15.0, 3; σ_pos=σ, track_id=3, photons=1000.0), + make_emitter(15.0, 15.0, 4; σ_pos=σ, track_id=3, photons=1000.0), + ] + smld = make_test_smld(emitters; n_frames=4) + result = combinelocalizations(smld; track_length=(2.0, Inf)) + + @test length(result.emitters) == 2 + sorted = sort(result.emitters, by=e->e.track_id) + # Track 1 survives at x=5 with summed photons; track 2 (x=10) is gone + @test sorted[1].track_id == 1 + @test sorted[1].x ≈ 5.0 atol=1e-10 + @test sorted[1].photons ≈ 2000.0 atol=1e-10 + # Track 3 survives at x=15 — would be corrupted if offsets shifted + @test sorted[2].track_id == 3 + @test sorted[2].x ≈ 15.0 atol=1e-10 + @test sorted[2].photons ≈ 2000.0 atol=1e-10 + end end diff --git a/test/test_types.jl b/test/test_types.jl index 15eb84d..3569021 100644 --- a/test/test_types.jl +++ b/test/test_types.jl @@ -8,7 +8,7 @@ smld = BasicSMLD(emitters, camera, 1, 1, Dict{String,Any}()) info = FrameConnectInfo{Float64}( - smld, 10, 5, 5, 0.1, 0.5, 0.01, 0.05, [1.0, 2.0], 1.5, :lap, 3, nothing + smld, 10, 5, 5, 0, 0.1, 0.5, 0.01, 0.05, [1.0, 2.0], 1.5, :lap, 3, nothing ) @test info isa FrameConnectInfo{Float64} @@ -16,6 +16,7 @@ @test info.n_input == 10 @test info.n_tracks == 5 @test info.n_combined == 5 + @test info.n_filtered == 0 @test info.k_on == 0.1 @test info.k_off == 0.5 @test info.k_bleach == 0.01