Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
274 commits
Select commit Hold shift + click to select a range
c54610b
fix: harden model constructor validation
ngamarra Mar 31, 2026
065c900
test: cover contrast mode validation
ngamarra Mar 31, 2026
bf49988
feat: add region contrast evidence builders
ngamarra Mar 31, 2026
f18af40
feat: add effect-size region contrast workflow
ngamarra Apr 1, 2026
0e63b25
fix: harden effect-size region contrasts
ngamarra Apr 1, 2026
59db7ea
fix: align region contrast effect-size validation
ngamarra Apr 1, 2026
e52a3cb
feat: add beta-binomial region contrast scoring
ngamarra Apr 1, 2026
6541a30
fix: correct beta-binomial region scoring
ngamarra Apr 1, 2026
a5d8f3a
test: strengthen beta-binomial contrast coverage
ngamarra Apr 1, 2026
4c99d13
docs: add region contrast workflow guide
ngamarra Apr 1, 2026
c5718c1
feat: add global analysis result model
ngamarra Apr 1, 2026
7569231
fix: harden global analysis result validation
ngamarra Apr 1, 2026
ca25364
test: strengthen global analysis result coverage
ngamarra Apr 1, 2026
4267206
feat: add global pileup summary workflow
ngamarra Apr 1, 2026
deaa549
fix: preserve missing global fractions
ngamarra Apr 1, 2026
20a551b
feat: add global window summaries
ngamarra Apr 1, 2026
cf11160
fix: zero window fractions at no coverage
ngamarra Apr 1, 2026
62867a5
feat: add global analysis orchestration
ngamarra Apr 1, 2026
a8fb1cc
fix: support multi-motif global analysis orchestration
ngamarra Apr 1, 2026
bd81267
feat: add region discovery result model
ngamarra Apr 1, 2026
06b1e4f
feat: add deterministic region discovery scan
ngamarra Apr 1, 2026
b0b1788
fix: tighten region discovery scan behavior
ngamarra Apr 1, 2026
8289de8
fix: reject unsupported region discovery contrast modes
ngamarra Apr 1, 2026
764cfc6
feat: add region discovery merge and bed export helpers
ngamarra Apr 1, 2026
7cfcbc2
fix: preserve region discovery merge ranks
ngamarra Apr 1, 2026
a57865d
docs: add region discovery workflow guide
ngamarra Apr 1, 2026
b94e779
fix: require explicit contrast for beta-binomial discovery
ngamarra Apr 1, 2026
994bca5
test: cover discovery to contrasts handoff
ngamarra Apr 1, 2026
f087f5f
docs: add paired region discovery design spec
ngamarra Apr 1, 2026
add5acb
feat: add paired region discovery pair resolution
ngamarra Apr 1, 2026
953ce43
fix: aggregate paired region discovery windows
ngamarra Apr 1, 2026
b0650f9
test: cover paired region discovery regrouping
ngamarra Apr 1, 2026
3c32cbd
test: verify paired helper aggregation
ngamarra Apr 1, 2026
cbe1942
feat: add paired region discovery pair resolution
ngamarra Apr 1, 2026
de86ef9
feat: add matched pairwise region discovery scoring
ngamarra Apr 1, 2026
f41a44a
fix: harden matched pairwise discovery guards
ngamarra Apr 1, 2026
af1b18d
fix: rerank matched pairwise hits after coverage filtering
ngamarra Apr 1, 2026
9403db6
fix: keep matched pairwise ranks consistent after filtering
ngamarra Apr 1, 2026
19c0f74
feat: add matched pairwise region discovery scoring
ngamarra Apr 1, 2026
752879e
fix: harden matched pairwise discovery guards
ngamarra Apr 1, 2026
9d1a79a
fix: rerank matched pairwise hits after coverage filtering
ngamarra Apr 1, 2026
0dadec8
feat: add paired time-course region discovery
ngamarra Apr 1, 2026
ace1eb5
fix: tighten paired time-course trajectories
ngamarra Apr 1, 2026
ed20093
feat: add paired time-course region discovery
ngamarra Apr 1, 2026
35a0954
docs: add paired region discovery guide
ngamarra Apr 1, 2026
4039553
test: cover paired time-course metadata
ngamarra Apr 1, 2026
48afa9f
fix: tighten paired discovery policy validation
ngamarra Apr 1, 2026
b6039ca
feat: add region discovery cluster result
ngamarra Apr 1, 2026
8abbb13
fix: tighten discovery cluster result model
ngamarra Apr 1, 2026
b060a61
fix: restore discovery cluster result export
ngamarra Apr 1, 2026
6cc6e5e
feat: add discovery cluster workflow
ngamarra Apr 1, 2026
4cfdbc8
fix: serialize discovery cluster matched regions
ngamarra Apr 1, 2026
b5ad109
fix: materialize workflow samples iterable
ngamarra Apr 1, 2026
e517ef7
docs: clarify discovery to cluster workflow
ngamarra Apr 1, 2026
b1847a8
Fix workflow matched region review regressions
ngamarra Apr 1, 2026
360315b
fix: fail fast discovery cluster validation
ngamarra Apr 1, 2026
bfd32b8
feat: add discovery cluster contrast result
ngamarra Apr 1, 2026
a5bb450
fix: validate selected discovery regions
ngamarra Apr 1, 2026
d103873
Add chrom alias coverage for selected regions
ngamarra Apr 1, 2026
3fb5acd
feat: add discovery cluster contrast workflow
ngamarra Apr 1, 2026
339aee9
docs: document discovery cluster contrast workflow
ngamarra Apr 1, 2026
59a630e
Fix discovery cluster contrast region handoff
ngamarra Apr 1, 2026
ccaec9e
Fix contrast region summary normalization
ngamarra Apr 1, 2026
5510eef
Fix region id normalization for contrast joins
ngamarra Apr 1, 2026
b23f41b
feat: add cluster occupancy evidence builder
ngamarra Apr 2, 2026
cc6f038
feat: add cluster occupancy region scoring
ngamarra Apr 2, 2026
f99b548
Fix cluster occupancy contrast mode validation
ngamarra Apr 2, 2026
2f0f3cd
Fix cluster occupancy validation and dominant cluster weighting
ngamarra Apr 2, 2026
4e42a24
Fix cluster occupancy region contrast pooling
ngamarra Apr 2, 2026
cadee6c
fix: tighten cluster occupancy contrast semantics
ngamarra Apr 2, 2026
cd8db22
docs: add plotting axis architecture spec
ngamarra Apr 2, 2026
769d899
feat: add plotting axis spec validation
ngamarra Apr 2, 2026
38aed59
test: add plotting helper regressions
ngamarra Apr 2, 2026
26232e8
feat: add fixed-window plotting prep
ngamarra Apr 2, 2026
d37592c
fix: harden fixed-window plotting prep
ngamarra Apr 2, 2026
2b36df0
feat: add aggregate segment-map plotting prep
ngamarra Apr 2, 2026
841bece
fix: validate segment map plot inputs
ngamarra Apr 2, 2026
aaf192d
fix: harden segment map plotting prep
ngamarra Apr 2, 2026
e75897f
Harden segment map plotting validation
ngamarra Apr 2, 2026
11ca0ed
Harden segment map validation
ngamarra Apr 2, 2026
db7aabc
refactor: route aggregate plotter through plotting core
ngamarra Apr 2, 2026
03fe0a8
refactor: route aggregate plotter through plotting core
ngamarra Apr 2, 2026
7e796c7
refactor: route aggregate plotter through plotting core
ngamarra Apr 2, 2026
79eefe2
Tighten plotting regression coverage
ngamarra Apr 2, 2026
5ef9f95
refactor: route single-read plotter through plotting core
ngamarra Apr 2, 2026
4b783fd
docs: finish plotting axis compatibility pass
ngamarra Apr 2, 2026
d177c1c
docs: add region contrast plotting spec
ngamarra Apr 2, 2026
8045ee2
test: add region contrast plotting coverage
ngamarra Apr 2, 2026
ee388bc
test: strengthen region contrast plotting coverage
ngamarra Apr 2, 2026
e3f2e51
test: dedupe region contrast plotting setup
ngamarra Apr 2, 2026
17e160d
feat: add region contrast plotting join helpers
ngamarra Apr 2, 2026
efdc978
fix: tighten region contrast plotting join guards
ngamarra Apr 2, 2026
1a0d532
feat: add region contrast profile plotting prep
ngamarra Apr 2, 2026
cc0318a
fix: preserve region contrast profile tracks
ngamarra Apr 2, 2026
7ba30ee
fix: collapse region contrast side labels
ngamarra Apr 2, 2026
c4f1d67
feat: add region contrast heatmap plotting prep
ngamarra Apr 2, 2026
5c000f2
fix: validate region contrast heatmap ranks
ngamarra Apr 2, 2026
d3a2c46
fix: scope region contrast heatmap rank validation
ngamarra Apr 2, 2026
d117998
docs: add region contrast plotting guide
ngamarra Apr 2, 2026
eded24e
docs: clarify region contrast plotting inputs
ngamarra Apr 2, 2026
e617082
docs: add superpowers cleanup spec
ngamarra Apr 2, 2026
17915c0
docs: add superpowers docs index
ngamarra Apr 2, 2026
03477ae
docs: make superpowers index links portable
ngamarra Apr 2, 2026
6b8a3e5
docs: track superpowers implementation plans
ngamarra Apr 2, 2026
98e761f
docs: verify superpowers docs status index
ngamarra Apr 2, 2026
f349516
docs: add docs integration cleanup spec
ngamarra Apr 2, 2026
84e1d01
docs: normalize superpowers doc paths
ngamarra Apr 2, 2026
e276937
docs: fix cleanup plan normalization examples
ngamarra Apr 2, 2026
736fdd5
docs: refresh superpowers index links
ngamarra Apr 2, 2026
b0c9219
docs: tighten analysis discoverability
ngamarra Apr 2, 2026
6a5b577
docs: remove remaining local path examples
ngamarra Apr 2, 2026
0dda826
docs: generalize cleanup path verification
ngamarra Apr 2, 2026
3fffead
docs: add region discovery plotting spec
ngamarra Apr 2, 2026
4d36a50
docs: add region discovery plotting plan
ngamarra Apr 2, 2026
f18576a
test: add region discovery scan plotting coverage
ngamarra Apr 2, 2026
f7d3cf0
test: tighten region discovery scan filter coverage
ngamarra Apr 2, 2026
2d6bd86
feat: add region discovery scan plotting prep
ngamarra Apr 2, 2026
0ab687e
fix: harden discovery scan plotting prep
ngamarra Apr 2, 2026
8248ff1
test: add region discovery hit context coverage
ngamarra Apr 2, 2026
5aece12
test: tighten discovery hit context coverage
ngamarra Apr 2, 2026
b5334c1
feat: add region discovery hit context plotting prep
ngamarra Apr 2, 2026
3adb01d
fix: harden discovery hit context prep
ngamarra Apr 2, 2026
7e6a671
docs: add region discovery plotting guide
ngamarra Apr 2, 2026
dba6636
docs: add global analysis plotting spec
ngamarra Apr 2, 2026
3bdaa40
docs: add global analysis plotting plan
ngamarra Apr 2, 2026
94fd332
test: add global analysis summary plotting coverage
ngamarra Apr 2, 2026
964deaa
test: tighten global analysis motif filtering coverage
ngamarra Apr 2, 2026
51b54dc
feat: add global analysis summary plotting prep
ngamarra Apr 2, 2026
9e74de8
fix: harden global analysis summary plotting prep
ngamarra Apr 2, 2026
51bb02e
test: add global analysis window plotting coverage
ngamarra Apr 2, 2026
5c8999c
test: tighten global analysis window coverage
ngamarra Apr 2, 2026
861fa83
feat: add global analysis window plotting prep
ngamarra Apr 2, 2026
da95cbb
fix: harden global analysis window plotting prep
ngamarra Apr 2, 2026
3d6e934
fix: align global window aggregation semantics
ngamarra Apr 2, 2026
2b00bd6
docs: add global analysis plotting guide
ngamarra Apr 2, 2026
d20c03e
docs: add docs coherence cleanup spec
ngamarra Apr 3, 2026
0131e7f
docs: add docs coherence cleanup plan
ngamarra Apr 3, 2026
4ac6e2f
docs: refresh superpowers status map
ngamarra Apr 3, 2026
48b2166
docs: align analysis guide naming
ngamarra Apr 3, 2026
6385ef7
docs: add shared clustering plotting spec
ngamarra Apr 3, 2026
927e859
docs: add shared clustering plotting plan
ngamarra Apr 3, 2026
1ccc7eb
test: add shared clustering distribution plotting coverage
ngamarra Apr 3, 2026
36cd168
test: tighten shared clustering distribution fixture
ngamarra Apr 3, 2026
d81a51b
feat: add shared clustering distribution plotting prep
ngamarra Apr 3, 2026
eb7ccd2
feat: tighten shared clustering result validation
ngamarra Apr 3, 2026
f8357df
test: add shared clustering profile plotting coverage
ngamarra Apr 3, 2026
14ad4f8
test: tighten shared clustering profile fixture
ngamarra Apr 3, 2026
b9a60be
feat: add shared clustering profile plotting prep
ngamarra Apr 3, 2026
17e1e21
feat: tighten shared clustering profile feature validation
ngamarra Apr 3, 2026
51bc304
test: add shared clustering region plotting coverage
ngamarra Apr 3, 2026
22105a4
test: tighten shared clustering region assertions
ngamarra Apr 3, 2026
e27d197
feat: add shared clustering region plotting prep
ngamarra Apr 3, 2026
18257f7
docs: add shared clustering plotting guide
ngamarra Apr 3, 2026
5775148
docs: add single-read region contrast design
ngamarra Apr 3, 2026
7cff34b
docs: add single-read contrast plan
ngamarra Apr 3, 2026
32efc9e
test: add single-read contrast validation coverage
ngamarra Apr 4, 2026
d02d132
feat: add single-read contrast request validation
ngamarra Apr 4, 2026
09c9762
feat: clean up single-read validation error text
ngamarra Apr 4, 2026
4e34001
test: add single-read mod-fraction evidence coverage
ngamarra Apr 4, 2026
d0bdaf9
feat: add single-read mod-fraction contrasts
ngamarra Apr 4, 2026
74d66c0
feat: tighten single-read mod-fraction validation
ngamarra Apr 4, 2026
080ec4f
feat: reject fractional single-read counts
ngamarra Apr 4, 2026
c82b0c2
test: add single-read feature contrast coverage
ngamarra Apr 4, 2026
d52bb11
feat: add single-read feature contrasts
ngamarra Apr 4, 2026
7f4d3fa
feat: add single-read feature contrasts
ngamarra Apr 4, 2026
ecd209c
test: add single-read matched pairwise coverage
ngamarra Apr 9, 2026
ca0e789
feat: add matched single-read contrasts
ngamarra Apr 9, 2026
7e21412
test: align single-read validation expectation
ngamarra Apr 9, 2026
33a0008
fix: harden matched single-read pairing
ngamarra Apr 9, 2026
3bd965f
fix: enforce matched single-read pair integrity
ngamarra Apr 9, 2026
8171b44
docs: add single-read contrast guide
ngamarra Apr 9, 2026
a9c7b50
fix: tighten single-read contrast support
ngamarra Apr 9, 2026
ff6b59b
docs: add matplotlib renderer design spec
ngamarra Apr 9, 2026
94f6bce
docs: add matplotlib renderer plan
ngamarra Apr 9, 2026
e34cc70
test: add matplotlib renderer export coverage
ngamarra Apr 9, 2026
6977c3c
feat: add matplotlib renderer module
ngamarra Apr 9, 2026
71a0b58
test: add region contrast matplotlib coverage
ngamarra Apr 9, 2026
fa0b32b
feat: add region contrast matplotlib renderers
ngamarra Apr 9, 2026
9af4ff6
fix: honor region contrast renderer contracts
ngamarra Apr 9, 2026
5fcee12
test: add region discovery and global analysis matplotlib coverage
ngamarra Apr 9, 2026
b57d483
feat: add matplotlib discovery and global renderers
ngamarra Apr 9, 2026
a86d30c
docs: add matplotlib renderer guide examples
ngamarra Apr 9, 2026
3e140dc
docs: add shared clustering matplotlib renderer spec
ngamarra Apr 10, 2026
2d28a1b
docs: add shared clustering renderer plan
ngamarra Apr 10, 2026
672d07e
test: add shared clustering distribution renderer coverage
ngamarra Apr 10, 2026
ecb02f7
feat: add shared clustering distribution renderers
ngamarra Apr 10, 2026
67cfa62
fix: tighten shared clustering renderer contracts
ngamarra Apr 10, 2026
20018b6
fix: preserve shared cluster payload order
ngamarra Apr 10, 2026
2b8866b
fix: honor prepared shared cluster display order
ngamarra Apr 10, 2026
0cbb021
Fix shared cluster prepared payload ordering
ngamarra Apr 10, 2026
9a4230d
test: add shared clustering profile renderer coverage
ngamarra Apr 10, 2026
e349954
Fix sparse shared cluster change heatmap
ngamarra Apr 10, 2026
a44a535
fix: preserve sparse shared cluster change axes
ngamarra Apr 11, 2026
7f7d170
feat: add shared cluster profile renderers
ngamarra Apr 11, 2026
0c03c9f
test: add shared clustering region renderer coverage
ngamarra Apr 11, 2026
693ddfe
feat: add shared clustering region renderer
ngamarra Apr 11, 2026
c0862a5
docs: add shared clustering renderer examples
ngamarra Apr 11, 2026
6125295
fix: tighten shared clustering renderer support
ngamarra Apr 11, 2026
5c9ca8b
docs: align shared clustering plotting docs
ngamarra Apr 11, 2026
11b715d
docs: add shared cluster tests design spec
ngamarra Apr 12, 2026
e663a44
docs: add shared cluster tests implementation plan
ngamarra Apr 12, 2026
a31ce63
test: add shared cluster tests skeleton coverage
ngamarra Apr 12, 2026
4529ad4
feat: add shared cluster tests module skeleton
ngamarra Apr 12, 2026
df0c5f4
feat: propagate shared cluster sample metadata
ngamarra Apr 12, 2026
9738e4d
feat: complete shared cluster inference v1 scope
ngamarra Apr 13, 2026
8baefd2
docs: add sprint 1 parser cleanup kickoff
ngamarra Apr 13, 2026
3497a10
refactor: type progress tracking in run_modkit
ngamarra Apr 13, 2026
94999b0
docs: clarify parse_bam region selector parameters
ngamarra Apr 13, 2026
c8d23e8
docs: document parse_bam hdf5 dataset semantics
ngamarra Apr 13, 2026
c84f4b6
fix: cap check_bam_format validation at configured read limit
ngamarra Apr 13, 2026
2133657
fix: keep check_bam_format validating malformed tags across read cap
ngamarra Apr 13, 2026
6c35d6e
fix: accept tuple region collections in regions_dict_from_input
ngamarra Apr 13, 2026
076a070
fix: align extract vectors to read span metadata
ngamarra Apr 13, 2026
5039374
fix: enforce binary-on-write threshold semantics
ngamarra Apr 13, 2026
34daa9d
fix: stabilize extract read coordinate reconstruction
ngamarra Apr 13, 2026
fc37939
refactor: tighten parser path coercion and remove stale TODOs
ngamarra Apr 13, 2026
745cec9
refactor: unify parser thread allocation logic
ngamarra Apr 13, 2026
a7056b6
fix: handle empty-region readwise extraction gracefully
ngamarra Apr 13, 2026
befed1a
fix: keep load_processed subset operations safe on empty selections
ngamarra Apr 13, 2026
57b1b39
fix: disambiguate read ids across overlapping regions
ngamarra Apr 13, 2026
a625de1
fix: validate load_processed subset parameter contract
ngamarra Apr 13, 2026
f2d8ed3
refactor: share extract vector compression helper
ngamarra Apr 13, 2026
f5793b5
fix: reject invalid subset array override in loaders
ngamarra Apr 13, 2026
526d01b
feat: wire loader progress bars and readwise pass-through options
ngamarra Apr 14, 2026
c0d0d70
refactor: centralize loader subset index handling
ngamarra Apr 14, 2026
35524e5
refactor: separate region bed normalization from command assembly
ngamarra Apr 14, 2026
7600c1a
feat: add optional parallel readwise extraction across reads
ngamarra Apr 14, 2026
6495c15
feat: parallelize loader region index selection via cores
ngamarra Apr 14, 2026
9e21539
refactor: consolidate parser hdf5 dataset and chunk writes
ngamarra Apr 14, 2026
2f305fe
feat: add pileup loader aliases for clearer public api
ngamarra Apr 14, 2026
7a79aa5
refactor: make prep_output_directory default dir explicit
ngamarra Apr 14, 2026
5509edf
refactor: refresh loader parallelization docs and fake-read naming
ngamarra Apr 14, 2026
dbda12b
refactor: clean stale parse_bam todo debt and path cleanup
ngamarra Apr 14, 2026
7b8b284
refactor: use csv parsing and clarify chunk flow in read_by_base
ngamarra Apr 14, 2026
7ec18f9
fix: validate extract coordinate conversion bounds
ngamarra Apr 14, 2026
5223158
refactor: clarify loader subset and h5 tuple assembly
ngamarra Apr 14, 2026
bc874ae
refactor: clean plotting todo debt and typing reassignments
ngamarra Apr 14, 2026
5d99f2b
test: harden parser and loader contract regressions
ngamarra Apr 14, 2026
622355d
docs: add legacy cleanup integration split plan
ngamarra Apr 14, 2026
46ccf4d
feat: complete shared cluster inference v1 scope
ngamarra Apr 13, 2026
d74f844
merge: align codex/legacy-cleanup ancestry with Clustering_test
ngamarra Apr 14, 2026
a7c6773
Merge pull request #1 from ngamarra/codex/legacy-cleanup
ngamarra Apr 14, 2026
7ecdacb
feat: advance clustering, parser, loader, and workflow reliability
ngamarra Apr 14, 2026
fea1de5
docs: unify superpowers plan status and remaining backlog
ngamarra Apr 14, 2026
8718616
fix: restore parse/load compatibility after rebase
ngamarra Apr 14, 2026
f470a59
feat: integrate DMR workflows and refresh clustering/tutorial tooling
ngamarra Apr 17, 2026
1fda0c7
Update modkit defaults and clean ruff findings
ngamarra Apr 28, 2026
6c95d9b
Refactor multisite raster selection
ngamarra Apr 29, 2026
48d55c3
Refine clustering features and association heatmap annotations
ngamarra Apr 30, 2026
2b9f366
Align association legends and title spacing
ngamarra Apr 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/formatting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ jobs:
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
with:
version: 0.6.8
version: 0.15.12
# TODO: Is it really necessary for these to be separate jobs? This seems redundant.
ruff-format-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
with:
version: 0.6.8
version: 0.15.12
args: 'format --check'
15 changes: 15 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,19 @@ dimelo/test/output

# Checkpoint files
.ipynb_checkpoints/
*.executed.ipynb

# Local worktrees
.worktrees/

# Local analysis artifacts and caches
artifacts/
cache/
docs/benchmarks/tmp_bench/

# Local benchmark/output files
modkit_test_results.json
pileup_0.2.4.bed.gz
pileup_0.2.4_test.bed
pileup_0.2.4_test.bed.gz
pileup_0.6.1.bed.gz
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.8
rev: v0.15.12
hooks:
# Run the linter.
- id: ruff
Expand Down
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
292 changes: 278 additions & 14 deletions README.md

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions dimelo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,67 @@
from . import (
chip_atlas,
cluster,
distribution,
dmr,
export,
global_analysis,
load_processed,
models,
parse_bam,
plot_depth_histogram,
plot_depth_profile,
plot_enrichment,
plot_enrichment_profile,
plot_read_browser,
plot_reads,
plotting,
plotting_matplotlib,
region_analysis,
region_contrasts,
region_discovery,
regulatory_enrichment,
shared_cluster_tests,
workflows,
)
from .models import (
ChipAtlasEnrichmentResult,
ModkitDMRMultiResult,
ModkitDMRPairResult,
RegionDiscoveryClusterContrastResult,
RegionDiscoveryClusterResult,
SharedClusterContrastResult,
UniBindJobResult,
)

__all__ = [
"chip_atlas",
"cluster",
"dmr",
"distribution",
"export",
"global_analysis",
"load_processed",
"models",
"parse_bam",
"plotting",
"plotting_matplotlib",
"plot_depth_histogram",
"plot_depth_profile",
"plot_enrichment",
"plot_enrichment_profile",
"plot_read_browser",
"plot_reads",
"regulatory_enrichment",
"region_discovery",
"region_analysis",
"region_contrasts",
"shared_cluster_tests",
"ChipAtlasEnrichmentResult",
"ModkitDMRMultiResult",
"ModkitDMRPairResult",
"RegionDiscoveryClusterContrastResult",
"RegionDiscoveryClusterResult",
"SharedClusterContrastResult",
"UniBindJobResult",
"workflows",
]
168 changes: 168 additions & 0 deletions dimelo/artifacts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
from __future__ import annotations

import hashlib
import json
from collections.abc import Iterable

from dimelo.models import DatasetArtifact

COMPATIBILITY_PROVENANCE_KEYS = frozenset(
{
"normalization_mode",
"feature_scaling",
"cluster_basis",
"motifs",
"window_size",
"region_source",
"region_digest",
"pipeline",
}
)


def _params_hash(params: dict[str, object]) -> str:
payload = json.dumps(params, sort_keys=True, separators=(",", ":")).encode()
return hashlib.sha256(payload).hexdigest()


def _normalize_sequence(values: object) -> tuple[object, ...]:
if values is None:
return None
if isinstance(values, str):
return (values,)
return tuple(values)


def _normalize_source_files(values: object) -> tuple[str, ...] | None:
normalized = _normalize_sequence(values)
if normalized is None:
return None
return tuple(sorted(str(value) for value in normalized))


def _normalize_source_fingerprints(
values: object,
) -> tuple[dict[str, object], ...] | None:
if values is None:
return None
normalized = [dict(value) for value in values]
normalized.sort(
key=lambda value: json.dumps(value, sort_keys=True, separators=(",", ":"))
)
return tuple(normalized)


def _mapping_subset_matches(
requested: dict[str, object],
candidate: dict[str, object],
) -> bool:
return all(item in candidate.items() for item in requested.items())


def _requested_params_hash_matches(
requested: dict[str, object],
candidate: dict[str, object],
) -> bool:
if not _mapping_subset_matches(requested, candidate):
return False
candidate_subset = {key: candidate[key] for key in requested}
return _params_hash(requested) == _params_hash(candidate_subset)


def _compatibility_provenance(requested: dict[str, object]) -> dict[str, object]:
return {
key: value
for key, value in requested.items()
if key in COMPATIBILITY_PROVENANCE_KEYS
}


def artifact_fingerprint(artifact: DatasetArtifact) -> dict[str, object]:
return {
"schema_version": artifact.metadata.get("schema_version"),
"package_version": artifact.metadata.get("package_version"),
"source_files": _normalize_source_files(
artifact.provenance.get(
"source_files", artifact.metadata.get("source_files")
)
),
"source_fingerprints": _normalize_source_fingerprints(
artifact.provenance.get(
"source_fingerprints", artifact.metadata.get("source_fingerprints")
)
),
"upstream_lineage": _normalize_sequence(
artifact.provenance.get(
"upstream_lineage", artifact.metadata.get("upstream_lineage")
)
),
"params_hash": _params_hash(artifact.params),
}


def _has_required_fingerprint_fields(fingerprint: dict[str, object]) -> bool:
required_fields = (
"schema_version",
"package_version",
"source_files",
"source_fingerprints",
"upstream_lineage",
)
if any(fingerprint[field] is None for field in required_fields):
return False
return bool(fingerprint["source_files"]) and bool(
fingerprint["source_fingerprints"]
)


def artifact_is_compatible(
requested: DatasetArtifact,
candidate: DatasetArtifact,
) -> bool:
requested_fingerprint = artifact_fingerprint(requested)
candidate_fingerprint = artifact_fingerprint(candidate)
if not _has_required_fingerprint_fields(requested_fingerprint):
return False
if not _has_required_fingerprint_fields(candidate_fingerprint):
return False
if requested.sample_id != candidate.sample_id:
return False
if requested.artifact_type != candidate.artifact_type:
return False
if any(
requested_fingerprint[field] != candidate_fingerprint[field]
for field in (
"schema_version",
"package_version",
"source_files",
"source_fingerprints",
"upstream_lineage",
)
):
return False
if not _requested_params_hash_matches(requested.params, candidate.params):
return False
return _mapping_subset_matches(
_compatibility_provenance(requested.provenance),
candidate.provenance,
)


def resolve_artifact(
requested: DatasetArtifact,
candidates: Iterable[DatasetArtifact],
artifact_policy: str = "prefer_cached",
) -> DatasetArtifact | None:
if artifact_policy == "rebuild":
return None

if artifact_policy not in {"prefer_cached", "require_cached"}:
raise ValueError(f"Unknown artifact_policy: {artifact_policy}")

for candidate in candidates:
if artifact_is_compatible(requested, candidate):
return candidate

if artifact_policy == "prefer_cached":
return None
raise FileNotFoundError("No compatible cached artifact found for require_cached")
Loading