From e2a3072c31284567cfd7e98a640bef47d1a71875 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 17 Mar 2026 23:53:36 +0000 Subject: [PATCH 1/3] Initial plan From 0c5b7fcb9a02cb1c1bdf92935caeb1467ac2a439 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Mar 2026 00:04:43 +0000 Subject: [PATCH 2/3] feat: add cell annotation anndata serializer Co-authored-by: yulewu <38241047+yulewu@users.noreply.github.com> --- pyproject.toml | 5 +- .../cell_annotation_roundtrip_fixture.h5ad | Bin 0 -> 79816 bytes tests/test_cell_annotation_serialize.py | 168 +++++ .../viewer/plugin/cell_annotation/__init__.py | 4 + .../plugin/cell_annotation/serialize.py | 649 ++++++++++++++++++ 5 files changed, 825 insertions(+), 1 deletion(-) create mode 100644 tests/data/cell_annotation_roundtrip_fixture.h5ad create mode 100644 tests/test_cell_annotation_serialize.py create mode 100644 ueler/viewer/plugin/cell_annotation/serialize.py diff --git a/pyproject.toml b/pyproject.toml index 36b28a8..85e88c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,10 @@ classifiers = [ "License :: Other/Proprietary License", "Operating System :: OS Independent", ] -dependencies = [] +dependencies = [ + "anndata>=0.11,<0.12", + "numpy>=1.23", +] [project.optional-dependencies] dev = [ diff --git a/tests/data/cell_annotation_roundtrip_fixture.h5ad b/tests/data/cell_annotation_roundtrip_fixture.h5ad new file mode 100644 index 0000000000000000000000000000000000000000..aea8ee394f4d2e90355d1b51a5c9d5af5a5eb23d GIT binary patch literal 79816 zcmeG_YjB&#aUUgHVf>_R#irk|EnD=1GC%?ZU?&x1z34=?XT@6E=M@e|qdhK2 z(vm;P50qs`t@D zjMLrdjV*!Et((TMg@?JBZUUbBSPyi1R?Sd>DwosYSqdJbTNkjts(O8nHR_`DSm#-5 ztzOG1q^8rk!mh!}-l79Jm-h6yC2=&-*yv?^H?rP@5rN$Nb1Z6657gK|Z?{t_=cWr( zsyW>=J#=vh_CO_W{X9T>tO1u)&ng1&FtvC%!U&`w>D4fD5%Yq;)i9DV1guMXtxKME zpDpW5roUx49P zA!L9h7#_Hj9$1XlD5taUAh^`FpZO3x5SJT{Fr1TCktZ&}JHY89w-Nqp{nv!PqJH(j z$UvEh)=2*b-2qb{|1uv)e~bK&vz$eK6S+d#nUyhU@u-#D<;TL$v74xDl}Xg~`dk>l z{Z?u^KQmProGzuE5~5rX*vLAUu)=3s;a6DidSISdD4fHZhw{MrK-$r4UU8ex%I%Jo zg@}7#+c)2{!{vYlDBQ5PExS-i+ZDSuF9^-xhR}u}(2?OIaJy8p_X3>FHOvnbf*Xch zd`U|;NxLNdX2wtDXUY|)G?C=A@T2D2oG;_~98Q-jC7g|g9ZSkd{lX79J~)q2kOF6O zTHt~&^xwkyg_(TbPUf9KrNVQ4!`ru&~a*+={>_XNuT-v8J$CCD{^0=7G30&lNi2@h-$+#AJI{97dD^K#fjNv-@ zUG6JK^1DKT3!hgiaN+YRhKnAjbEyh~uJ+**Ujc@z_}6IguhrmRr@>#R!M|REe}e*- z_O4gp(q8fk>f=`0d!rAZ+RM{lZQKCV9p5)IzUV3Gk!9WDqeJ~N$nXTm(QtA&n6!su zBdMWSCg_AR(M&8FbB1F>(NHEFie~I+CKyVGlS83!Bst<_;*sERDCrD^gE2c2-N1Cz z`fp{p=&7_D=P(Qj_t&z5@ewDMi6=)Q!SHa<9v&VFIwR>wFdl-6!cICI4u(^iWGWmF z4yEk$NN6}3iN~B|9QbtNL!l7UQR^RKxLQArzIy$kR4kdcBO|eBG!nAovB*$7HWH5| zM^eB{EDdZ7C4??$HUHW5+oeW z#6$6L3Yu%DGoe%}=med3JQ@kd!^vPQ9*kv19O#LWXeJp-+M%IX%#KErsc0+~ zN;`Hc8i}TZk@QG9ni6@$Xz({P zTu(FL(Or zQT=acxGEQRw`=o`_#rzOU+iJo$vdfvlb%Rr!G&@1WzN5gW0jA)8LsNbJq%a*81vzi z9PVX!-5%2jzt0D!k$S%mP9tR}!*%xf0nS(XeURZ|AH|OnKj|S&zW7<=P3nDEQ|~(& zuD1JKJ~*}e-99+A`w@og+Wj8RSKB?oaJ8Q%efZQ*A`kH^uM35ShK7eNk%OuqDIXou z(=@|tbi(0CWMss07_QREXz1)>xXORlhfn1p33*Ek1oj(*E|?>^Xxv#_RL#U?g@tL$~~zm_Y}ibIlb41PjZs|8eKhe zzH-FhGYr?2dsb8KIfkqJJ@3OO{$6NO&x^it#NYdxl$+O-dx_yHfA9C<6MrAj;D1nq z{~?C!GH1+KF;S*mUWw@-Tc)kzh3W0a6-x)|b`TTmEQ$NOdsy#FEg+M(%!bA#rCGrvU}6_LfR`Ww7A|hT4*mmT0(CcEv7e(merl*nIBt8rZ5x1t9pMOp-?CQJ|7WUcC61E9uLW^i@jwIwgIr zlDNJ*ncd-HEq(zwL(=A(wY`)wi+9K=0tm%{`01_p^m z`pxpO*MuOdsJ91EMZG;d0qJ!f{0uzf#46=cCB11>N$ge0N0nUbNtfW+TR$3eZ+SGr z*bXU=#tvgVLg?xd$)VGJ)$`hZ7Qj5uFCvfKc7Dcz#r~9CcDF2gI3pnl%Z%x~A8$cW2%vWd zLFkThI;-$OSlwTdfd3u|^`M+zi+d@t+#AuISJ&^2P*>FNjr6l>1f*&=uAi^3?7j^y zP(%q(7vvm&ZonmPjZ*}_f!fnz52RPCYPU>|GaX$S(Z2y)Ep$Uo0w(T};Nq~yTH`9# z01tGzj-=Pu0T;g?6c~&2L5ES%UTfQ?9rteq*v|VNz{t(0>#ut6DaQIMBf%{J3*=EQ zx-$L6<&fwvE`3bM?QEbc_ubTau~q)P8wNNQP4IuNhkqHya__2D{!yny{TxS0tA_NDAoP#6bY#QRv$O~H13~zg4`qGX|iqi;|}946@;q$ zi{nM+zt_(FIFyNQeRIm)Rjc#gY`pc;v1WI?aesGOq>eXs0<0|0!y*sU>l+Jqiy_E( z9^mw-!Uth>e?S%R+ zbA;*CIc|IsKcg4V-@2hD0TX^A`qItTH+4NFR#wA%A2=tpzP@>s^;c>j`mO4(cYSj) zkTfB;OMtGdZ`66QRsOx}8x)WU{x9|Le*{kzAslCau2ufM>l-{UHNig~%zm`ZHHu<!R z<}m@Z7|ZxOikGq=oJ_b8j2E&*D}8_5JN^Pd(!}_?2I%U>--#{Mjw(fb{@)#cERJrZ zNI*CK=2?#f-PHISy@P7Va((R_f7b$O6XWkXpsO2ylkCV=saf6e$8zXK3rj#Z{(4!D zq?D=gca%=ft&`iSrgo0MbwJX@_`4qH>c-#xyQv*jikjYkJGFxl#qZ!^-Kaw(ppHLT zP0zohQ2hjxxXd2LE;64oH|=yVb;V?)@D zAB%th=_!mM?Z%KUa@vja_Fnj4on_4wsGQibNklRbX#e?4E6B71yv~z))peiP)3JM~ zfZ)w?`oO&e7pt|O&)Eb|J$IAwc#!vZ1W(=Xi7|f%*$=MaDSk1dz3Nf3`%R}=A$nhR z&bk%879=3^k<1G+-bUF@36Ju*%5o~}OxY7zyPS2iAenlsJTpbJLfyZW@gwEcn8x=S zG2PZtc2H&f^zNh*1I))96F6`!VF+H8(s;K zlIZGwrM^U3he7aB&K^iDj>kf?PERh5C-cPUdltw0S>>A0*CA+(Is5Ww#`P6nH8CL< zyy9(6F5fUN7kuT&gj`}qDh^L)*gzCNV+t=XTtp$yaI>s#4iY^bD%nMHe#pisk$F5_CV@cZY=xl>UyWu^_zFU zeIqbsLjMv#SNK==0ouv`CPVz+0d#fe1p~Z4Pz4+e&HR4L-Ho*@oU6K_^CY19{j)5B zu{nA$G5#I~(mMS;U8H)e6gAb~@o7R7f0T=LqYjaP=&#t{es1@ne4_xe)1}3QZ~+M9Tq32P#e z$C1Yy>7C%TRUvqc%lE%mL&y-gxgXfB_Ycp!l$s!tr&O!<{yZeZowgYs#VSCs1hc8Ua4yW7VN53oHEIVAVelgzrKlCi?LxxHBF_Z8N#eUO$AgRPEh z?|CG)UF^fL-9*1`AA&2Ah-xop75T4-Xa^QDG2h+^auj=^`dh8)mDgUp9qKW`KVEUw z*^BX~NJ**`HMJK5};;RT^FVua3cG?TPvTB0=F`z5wfc@<7sozgs&0?*`EA`iI;PKb!H?=CK zbC5qaQaj<%gq$7#x_&>lA8WSs-q~MQTr#qNe_DcAgNs{x1ux z!y?uHO>q0ilzZD#PQ{*(f4&Km)pV&mQ7k!Sm=xU-3+3DSJm@I}vLc*-NT`tG=HQRr*}K8@<6Lpz7}cx3`byHEF)6uFU{R%QZ&P(?+jNO8aBl-&zG4%HQMowDEN?>W=uAy&t5GtNWKr z)uhG_7rhdFIs8&%zM7JGZ<6a*^E)+d!tcT>%Hm(Q|2>Y+8`oC{_%|V!4A2#STAk0@ z=})7pZi4@;+plN1zKV<|8P~@@4w(?D?r-z+$KH<-;sE1!qt23m>W{>@`3Kn#5NU}2 zVKYF|;*Urg=Nt@T4+Cz>|6|UM{<-ma39k^zcpLk9YFm5k=xW(Avra4WS0yOp{t(x% z=1W?}b@dkn^2Ux$%(st$$jsT%ZyMKEyqau6E_kKNoLs(ZTrNdJa=|Ns;$Nuis&@Jp z_(c#C{fgfR5dPKo;@inT?!=hjAFovD_G|VZflLTh_qVx!H1NxW*vI(YsIw%X`bRZ; z`pHH)zPZFxK zpqlNEkN!F#ihs++x>1KnK%H*`-0q+<4mWQZc5^mw33D8CV;R4c;=)Uabox8~87f?5 z8qM_g;BOM5%qv{18+C{TRQ;8C?9{I{3Si2?ugqFjxk5KAL~j#~XiHAfK74)uV4lZ| z&^^rQW6X!>Xa6CpLhurtp8Rcs3;&b9*N8XI=|jK2WENi+h)eamE(mPoy_$p-KHCbv z!WXgh|Cf5rWh4OcOeodrdg?qR!zA~@qxfAXKSlJ%DT3dndY-ic?t^;1v;Q|5^VO8- zRpQfXPG_f0_+5DAbgBC<%h%WQjO#02Wj7(0CxNc)U#Rn5tK-dkUka}ho8TW`9o6|G z2R{p$5UTEP^ZkpHze9*JF1c7Y>JSO&{E<1f?;?hF`y=M;=*7n8rKdqCVn>6&OA>0g z9X+ndU;05}8L#tPzrZ^^HZkAMfl$ra(W{K>>odT=3AsEAbj6OU>w9K67?-gK>SuE6pb4G7ogcj;i}+?X;tK_0|Oc@AvSp zo{P1U{|^}AAFmpVUe@2Iq)ju~_goD(Yi--KLxAmza8o3-*%578O4OC=kR$E-Ku4+JSM{ILqx>PzQRcsb#oJHUoy{uzbrtG41Hp7K4X@~Grr$cf%m+_^YQm*$e39?wEio5zp$|^?DK28T@T45xHBv6F3n-}LHqP*RJ;c{-443()Ff-+pa;d>`%Fg4zoV;CA?yU@0=dHK-@TomN zr{EW+3l4%dGF+V(5)4=Q-NbOU-aCBwRPSbn>)N-)SB~VeRa4J4A3oKyy-7WzzH(I0 zolVNUT~qEIP0H=il)I}*xw|#x?qRqp_c0$n$>ZK8_1xzxNAkG8Nx7Yxat|~q_n@ZS zLruz!Ysx*$a8(}f^x=~{-ol<+aJ4wuf z)ZX(s>2LH6{O6d|1M&_F zo6cM?Z-y7{MLX|QDvq_UoSH5<6GgjZPvP1%lgn3}QrW`q66bS;$Lw7W7W1Yjl6E;K z>s)##e*%05f@Jd3d&<*Omb%^+c?Vo-{vVyc8#_U`RW8!Z-<|shLc}?w8#+${s=rIK z0Ng+N`$k!?-saM*$mv|UVi!`R?0_oz8f+}4P5I}aM*l&!2PBsHX8HFGK4E0O@%j%J zK`UjxIr@*(P z9U=j-qldY@lN|T69+}YJVW{KPv!h=zZbz?x_J|$r`z{G^Wp;E;?fc^UzsDV+iNxM! zxjqe=mEepM-`8OfJjU5aPA-lo?dtv4#qnf5Ir5#w@!FkhLSL^0CdH1{@4r(Iw13~| zdq(W4*YCR;+G9dafhEWZ_gpd5>wr%aa=ISqHq$3rKS;dt zb;SD1QQnpnaDU~eopKrgq9){YBhYOor@9_d%T_L@?JOsA`h<7yOvq`2E2mzNmHU9d z@Z2~*wNFlajLT^V_!oOR`5m`8me?Itk11FGTPfMOf_oF1aA5sk%-ee>oLR^LkiFZ^ zyZXt#9_92&KEKLx*TyatjD`DJ_=xcV zr(jQVdhRTO6uJ21Wt0}Yc}^d?f*=9LOZ+E22ws-c^FL;UpHXCD+{d9UI=glFr_|6Y zx3}4ORNrYr?Aj?7ScgSow`5%Qar+M`{{FawK44zP%k7kadn~S6bC0 z?>gtLP>%`z-v)Gb_Tt1VB;6`SP0w8t|3`>5J>_!Us52y>vlqwzr?LI=e=%g%HE%Cg zx*vO#?Soi8YDcT%+Iuf zJ*f7r?+nq1#Y%PaR0)V3QupiHX@|B#r6%mqHlQo#s4^kQ3R$#aHE0?><9GYW#v~wx z@9omL;;L1&L0SJE*0)*z8!b|+^6>5lY=`g;mye3D2Na4ikLEe8C3CyN*?k5iRa$L$gV4dssIVOlpjN3n+Up79T?}WCQ7|-~P z7qWAB`-8&dc`~yxK#645I<54Rf(El9g|TxNr>B;Cy{6l*SKl|TSNJU-6MD4+=(b-T zKQbw_mS%@U;jCB5_K!YA3ww!~21x3BB40 zblWeFHyf7+e#6g%JRUS6j|+^;;~_)x7zeuI2aWg9xS!?uq*eX)`ausH;{TnV{?xtC zBJUkYyz+bXmvTP`pw@b5RZiabG#&x6CiEV^5vkiJ>R<3)bC333^X%26CyldVT^`-1 zH?{;+e^LC4qkR8TTAwSxc?u-Nn;4ah6~^KpO>+4l&sRcUoxj8nE%JG*;Hl@YV@z-0 zO@gs7gQD;If3$G^>jm2Up503CO~iSA6pbK>v^qa}?@desNjXQV2B^Q#hEb3nm-NzN z^;{`?)#7@vG^(A4Wx{URAV%>|)O+5o>Xp|&NkTm)_)mHGKl}sIiDP{J*-rlP0NMoq z_%4~wZx|h+aZ;sbHQi^*-az=1jNgqqO9J9I9Oh|L)V)^hS@qRZXkivkQ8y=ieg@sGx@UmS0Y>Gg*f$CLf2;Ejvp^)bEP zfyMC>j5m7=;R*IZPFFV&WRmfM5y4=95}D9f922r%qg&4kR}!y$J^Qc`{q?SA@!R?) zbk7df^6yO=p6a`F1Hcz|v~E_e`cPA+?m%cW>YE{_|P%S*=Pf)@%+=nJjS7d}TE zAEp_m3Uy;>c8+*DNcb`zQ>pH6r$|8jJMmA%exG2wDsm{;Q|>1!cEd+o@v^Phv4kM3 zFs56$?mb+m0N-|<#>oUi+9{_>xnk`iZ>j|UpbtKWVWaXxfP22_nUwo4Ox$!XJ%KEi zi#B~*vs`hC6F^+}9Cfi){$tJ`{iN}EXg4%l{85YjJn8RteP{k8uLpDzmiZ+yBt4?T z>@2uByZ>9p^$IW4o3L;DfUfKxseXPt_g|hc#Q&2(*Id7T(Rjb&|57s1uTLA5%U>9m z%QJ@L@+{C5eXm|i^W%wi6t`2~pEJb&^B(?Z@$wvm1N{B?cJluM&^4i#F9O~F2Q-<8 A;{X5v literal 0 HcmV?d00001 diff --git a/tests/test_cell_annotation_serialize.py b/tests/test_cell_annotation_serialize.py new file mode 100644 index 0000000..74a2923 --- /dev/null +++ b/tests/test_cell_annotation_serialize.py @@ -0,0 +1,168 @@ +"""Tests for Cell Annotation AnnData serialization helpers.""" + +from __future__ import annotations + +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from ueler.viewer.plugin.cell_annotation.serialize import ( + SCHEMA_HASH, + serialize_heatmap_state, + validate_artifact, + write_h5ad_atomic, +) + +FIXTURE_PATH = Path(__file__).resolve().parent / "data" / "cell_annotation_roundtrip_fixture.h5ad" + + +def _example_payloads() -> tuple[dict, dict, dict]: + display = { + "obs_names": ["cluster_a", "cluster_b"], + "var_names": ["CD3", "CD4", "CD8"], + "X": [[-1.25, 0.5, 1.0], [0.25, -0.5, 0.75]], + "median_matrix": [[10.0, 11.0, 12.0], [13.0, 14.0, 15.0]], + "obsm": {"X_umap": [[0.0, 0.1], [1.0, 1.1]]}, + "ui": { + "orientation": {"horizontal": False}, + "row_sort": {"by": "dendrogram"}, + "col_sort": {"by": "selected_channels"}, + "selected_channels_ordered": ["CD8", "CD4", "CD3"], + "row_order": ["cluster_b", "cluster_a"], + "col_order": ["CD8", "CD4", "CD3"], + }, + "palette": { + "meta_cluster_colors_present": {"cluster_a": "#112233", "cluster_b": "#445566"}, + "meta_cluster_colors_all": {"cluster_a": "#112233", "cluster_b": "#445566", "cluster_c": "#778899"}, + }, + "zscore_params": { + "method": "per-marker-zscore", + "per_marker": { + "CD3": {"mean": 11.5, "std": 1.5}, + "CD4": {"mean": 12.5, "std": 1.5}, + "CD8": {"mean": 13.5, "std": 1.5}, + }, + "clipped": True, + }, + "filters": { + "expr": "cluster in ['T cell']", + "structured": {"subset_on": "meta_cluster", "values": ["T cell"]}, + "source": "heatmap", + }, + "row_linkage": [ + [0.0, 1.0, 0.42, 2.0], + ], + "row_linkage_basis": {"marker_ids": ["CD3", "CD8"], "distance": "euclidean"}, + "marker_sets": { + "training": ["CD3", "CD8"], + "display_extra": ["CD4"], + "available": ["CD3", "CD4", "CD8"], + "linkage": ["CD3", "CD8"], + "expanded_training": ["CD3", "CD8", "CD4"], + "panel": ["CD3", "CD4", "CD8"], + }, + } + flowsom = { + "training_markers": ["CD3", "CD8"], + "imputation": {"enabled": False}, + "projection": {"method": "none"}, + "availability": {"flowsom_plugin": True}, + "seed": 7, + "grid": {"xdim": 2, "ydim": 2, "rlen": 10}, + "params": {"seed": 7, "xdim": 2, "ydim": 2, "rlen": 10}, + "deps": ["numpy", "anndata"], + "hashes": {"input": "abc123"}, + } + meta = { + "artifact_version": "1.0.0", + "checkpoint": { + "id": "018f05c9-1d4e-7f0a-b341-c85e6020d0b6", + "parents": ["root"], + "op": "save", + "step_id": "heatmap.export", + "description": "Example checkpoint", + "created_at": "2026-03-17T23:54:11Z", + "producer": {"name": "UELer", "version": "test"}, + "id_namespace": "ueler.test", + }, + } + return display, flowsom, meta + + +class TestCellAnnotationSerialize(unittest.TestCase): + def test_round_trip_preserves_axes_orders_and_required_schema(self): + display, flowsom, meta = _example_payloads() + adata = serialize_heatmap_state(display, flowsom=flowsom, meta=meta) + + self.assertEqual(adata.uns["artifact"]["schema_hash"], SCHEMA_HASH) + self.assertEqual(adata.uns["flowsom"]["training_markers"], adata.uns["marker_sets"]["training"]) + self.assertEqual(adata.uns["row_linkage_basis"]["marker_ids"], adata.uns["marker_sets"]["linkage"]) + + with tempfile.TemporaryDirectory() as root: + path = Path(root) / "checkpoint.h5ad" + write_h5ad_atomic(adata, path) + + restored = validate_artifact(path) + + self.assertEqual(restored.obs_names.tolist(), ["cluster_a", "cluster_b"]) + self.assertEqual(restored.var_names.tolist(), ["CD3", "CD4", "CD8"]) + self.assertEqual(restored.uns["ui"]["row_order"], ["cluster_b", "cluster_a"]) + self.assertEqual(restored.uns["ui"]["col_order"], ["CD8", "CD4", "CD3"]) + self.assertIn("h5ad_sha256", restored.uns["artifact"]["checksums"]) + + def test_validator_rejects_bad_hex_colors(self): + display, flowsom, meta = _example_payloads() + display["palette"]["meta_cluster_colors_present"]["cluster_a"] = "blue" + + with self.assertRaisesRegex(ValueError, "invalid hex color"): + serialize_heatmap_state(display, flowsom=flowsom, meta=meta) + + def test_validator_rejects_non_permutation_orders(self): + display, flowsom, meta = _example_payloads() + adata = serialize_heatmap_state(display, flowsom=flowsom, meta=meta) + adata.uns["ui"]["row_order"] = ["cluster_a", "cluster_a"] + + with self.assertRaisesRegex(ValueError, "row_order must be a permutation"): + validate_artifact(adata) + + def test_validator_requires_zscore_params(self): + display, flowsom, meta = _example_payloads() + adata = serialize_heatmap_state(display, flowsom=flowsom, meta=meta) + del adata.uns["zscore_params"] + + with self.assertRaisesRegex(ValueError, "missing required block 'zscore_params'"): + validate_artifact(adata) + + def test_validator_detects_checksum_mismatch(self): + display, flowsom, meta = _example_payloads() + adata = serialize_heatmap_state(display, flowsom=flowsom, meta=meta) + adata.uns["artifact"]["checksums"]["x_sha256"] = "0" * 64 + + with self.assertRaisesRegex(ValueError, "checksum mismatch for X"): + validate_artifact(adata) + + def test_atomic_writer_cleans_temp_file_on_failure(self): + display, flowsom, meta = _example_payloads() + adata = serialize_heatmap_state(display, flowsom=flowsom, meta=meta) + + with tempfile.TemporaryDirectory() as root: + path = Path(root) / "checkpoint.h5ad" + with patch("ueler.viewer.plugin.cell_annotation.serialize.atomic_replace", side_effect=RuntimeError("boom")): + with self.assertRaisesRegex(RuntimeError, "boom"): + write_h5ad_atomic(adata, path) + + self.assertFalse(path.exists()) + self.assertEqual(list(Path(root).glob(".*.tmp*.h5ad")), []) + + def test_checked_in_fixture_validates(self): + fixture = validate_artifact(FIXTURE_PATH) + + self.assertEqual(fixture.obs_names.tolist(), ["cluster_a", "cluster_b"]) + self.assertEqual(fixture.var_names.tolist(), ["CD3", "CD4", "CD8"]) + self.assertEqual(fixture.uns["ui"]["row_order"], ["cluster_b", "cluster_a"]) + self.assertEqual(fixture.uns["ui"]["col_order"], ["CD8", "CD4", "CD3"]) + + +if __name__ == "__main__": # pragma: no cover + unittest.main() diff --git a/ueler/viewer/plugin/cell_annotation/__init__.py b/ueler/viewer/plugin/cell_annotation/__init__.py index 8009597..212d39f 100644 --- a/ueler/viewer/plugin/cell_annotation/__init__.py +++ b/ueler/viewer/plugin/cell_annotation/__init__.py @@ -4,6 +4,7 @@ from .manifest import Manifest from .plugin import CellAnnotationPlugin +from .serialize import serialize_heatmap_state, validate_artifact, write_h5ad_atomic from .selection_spec import MaterializedSelectionSpec from .store import DatasetStore, atomic_replace, atomic_write_json @@ -12,6 +13,9 @@ "DatasetStore", "Manifest", "MaterializedSelectionSpec", + "serialize_heatmap_state", + "validate_artifact", + "write_h5ad_atomic", "atomic_replace", "atomic_write_json", ] diff --git a/ueler/viewer/plugin/cell_annotation/serialize.py b/ueler/viewer/plugin/cell_annotation/serialize.py new file mode 100644 index 0000000..39390d4 --- /dev/null +++ b/ueler/viewer/plugin/cell_annotation/serialize.py @@ -0,0 +1,649 @@ +"""AnnData serialization helpers for Cell Annotation checkpoints.""" + +from __future__ import annotations + +import copy +import hashlib +import importlib.machinery +import json +import os +import re +import sys +import tempfile +import time +import types +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Mapping + +_dask_stub = sys.modules.get("dask") +if _dask_stub is not None and getattr(_dask_stub, "__spec__", None) is None: # pragma: no cover - test bootstrap quirk + _dask_stub.__spec__ = importlib.machinery.ModuleSpec("dask", loader=None) +if _dask_stub is not None and not hasattr(_dask_stub, "__path__"): # pragma: no cover - test bootstrap quirk + _dask_stub.__path__ = [] +if _dask_stub is not None and "dask.array" not in sys.modules: # pragma: no cover - test bootstrap quirk + dask_array_stub = types.ModuleType("dask.array") + dask_array_stub.Array = type("Array", (), {}) + dask_array_stub.__spec__ = importlib.machinery.ModuleSpec("dask.array", loader=None) + sys.modules["dask.array"] = dask_array_stub + _dask_stub.array = dask_array_stub + +import anndata as ad +import numpy as np + +if hasattr(ad, "settings"): # pragma: no branch - depends on anndata version + try: + ad.settings.allow_write_nullable_strings = True + except Exception: # pragma: no cover - defensive for older/newer anndata variants + pass + +from .store import atomic_replace + +SCHEMA_VERSION = "1.0.0" +ZERO_SHA256 = "0" * 64 +_MISSING = object() +_HEX_COLOR_RE = re.compile(r"^#[0-9a-fA-F]{6}(?:[0-9a-fA-F]{2})?$") +_SCHEMA_SPEC = { + "version": SCHEMA_VERSION, + "required_uns": [ + "artifact", + "ui", + "palette", + "zscore_params", + "filters", + "row_linkage", + "row_linkage_basis", + "marker_sets", + "flowsom", + "checkpoint", + ], + "required_ui": [ + "orientation", + "row_sort", + "col_sort", + "selected_channels_ordered", + "row_order", + "col_order", + ], + "required_marker_sets": [ + "training", + "display_extra", + "available", + "linkage", + "expanded_training", + "panel", + ], +} +SCHEMA_HASH = hashlib.sha256( + json.dumps(_SCHEMA_SPEC, sort_keys=True, separators=(",", ":")).encode("utf-8") +).hexdigest() + + +def serialize_heatmap_state(display: dict, *, flowsom: dict, meta: dict) -> ad.AnnData: + """Serialize a Cell Annotation heatmap checkpoint into an :class:`AnnData` artifact.""" + + obs_names = _normalize_name_list( + _coalesce( + (display, meta), + ("obs_names", "row_names", "cluster_ids", "rows"), + ), + "obs_names", + ) + var_names = _normalize_name_list( + _coalesce( + (display, meta), + ("var_names", "marker_ids", "markers", "cols", "columns"), + ), + "var_names", + ) + + X = _to_matrix( + _coalesce( + (display, meta), + ("X", "x", "zscore_matrix", "z_scored_medians", "zscored_medians"), + ), + shape=(len(obs_names), len(var_names)), + label="X", + ) + + adata = ad.AnnData(X=X) + adata.obs_names = obs_names + adata.var_names = var_names + + median = _coalesce( + (display, meta), + ("layers.median", "median_matrix", "median", "raw_medians"), + default=None, + ) + if median is not None: + adata.layers["median"] = _to_matrix( + median, + shape=(len(obs_names), len(var_names)), + label="layers['median']", + ) + + obsm = _coalesce((display, meta), ("obsm", "embeddings"), default={}) + if obsm is None: + obsm = {} + if not isinstance(obsm, Mapping): + raise ValueError("obsm must be a mapping of embedding names to 2D arrays") + for name, value in obsm.items(): + adata.obsm[str(name)] = _to_embedding(value, n_obs=len(obs_names), label=f"obsm[{name!r}]") + + marker_sets = _normalize_marker_sets(display, flowsom, meta, var_names) + row_linkage_basis = _normalize_row_linkage_basis(display, meta, marker_sets) + row_linkage = _normalize_row_linkage(_coalesce((display, meta), ("row_linkage",), default=[])) + + adata.uns = { + "artifact": { + "version": str(_coalesce((meta,), ("artifact.version", "artifact_version"), default=SCHEMA_VERSION)), + "schema_hash": SCHEMA_HASH, + "checksums": {}, + }, + "ui": _normalize_ui(display, meta, obs_names, var_names), + "palette": _normalize_palette(display, meta), + "zscore_params": _normalize_zscore_params(display, meta, var_names), + "filters": _normalize_filters(display, meta), + "row_linkage": row_linkage, + "row_linkage_basis": row_linkage_basis, + "marker_sets": marker_sets, + "flowsom": _normalize_flowsom(flowsom, marker_sets), + "checkpoint": _normalize_checkpoint(meta), + } + + _refresh_checksums(adata) + return validate_artifact(adata) + + +def validate_artifact(path_or_adata: str | Path | ad.AnnData) -> ad.AnnData: + """Validate a serialized Cell Annotation artifact and return the loaded object.""" + + source_path: Path | None = None + if isinstance(path_or_adata, (str, Path)): + source_path = Path(path_or_adata) + adata = ad.read_h5ad(source_path) + elif isinstance(path_or_adata, ad.AnnData): + adata = path_or_adata + filename = getattr(adata, "filename", None) + if filename: + source_path = Path(filename) + else: + raise TypeError("validate_artifact expects a filesystem path or an AnnData object") + + if adata.X is None: + raise ValueError("artifact is missing X") + X = np.asarray(adata.X) + if X.ndim != 2: + raise ValueError("artifact X must be a 2D matrix") + if tuple(X.shape) != (adata.n_obs, adata.n_vars): + raise ValueError("artifact X shape does not match obs/var dimensions") + if X.dtype != np.float32: + raise ValueError("artifact X must use float32") + + if len(set(map(str, adata.obs_names))) != adata.n_obs: + raise ValueError("obs_names must be unique") + if len(set(map(str, adata.var_names))) != adata.n_vars: + raise ValueError("var_names must be unique") + + uns = adata.uns + for key in _SCHEMA_SPEC["required_uns"]: + if key not in uns: + raise ValueError(f"artifact.uns is missing required block {key!r}") + + artifact = _ensure_mapping(uns["artifact"], "artifact") + if artifact.get("schema_hash") != SCHEMA_HASH: + raise ValueError("artifact schema_hash does not match the expected Cell Annotation schema") + + checksums = _ensure_mapping(artifact.get("checksums"), "artifact.checksums") + expected_x_sha = checksums.get("x_sha256") + if expected_x_sha != _hash_array(X): + raise ValueError("artifact checksum mismatch for X") + + if "median_sha256" in checksums: + if "median" not in adata.layers: + raise ValueError("artifact checksum advertises median layer, but layers['median'] is missing") + median = np.asarray(adata.layers["median"]) + if median.dtype != np.float32: + raise ValueError("layers['median'] must use float32") + if checksums["median_sha256"] != _hash_array(median): + raise ValueError("artifact checksum mismatch for layers['median']") + + ui = _ensure_mapping(uns["ui"], "ui") + for key in _SCHEMA_SPEC["required_ui"]: + if key not in ui: + raise ValueError(f"ui block is missing required key {key!r}") + _validate_order(ui["row_order"], list(map(str, adata.obs_names)), "row_order") + _validate_order(ui["col_order"], list(map(str, adata.var_names)), "col_order") + if checksums.get("row_order_sha256") != _hash_json(ui["row_order"]): + raise ValueError("artifact checksum mismatch for row_order") + if checksums.get("col_order_sha256") != _hash_json(ui["col_order"]): + raise ValueError("artifact checksum mismatch for col_order") + + selected_channels = _normalize_name_list(ui["selected_channels_ordered"], "selected_channels_ordered") + if len(set(selected_channels)) != len(selected_channels): + raise ValueError("selected_channels_ordered must not contain duplicates") + unknown_channels = [name for name in selected_channels if name not in set(map(str, adata.var_names))] + if unknown_channels: + raise ValueError(f"selected_channels_ordered contains unknown markers: {unknown_channels!r}") + adata.uns["ui"]["row_order"] = [str(item) for item in ui["row_order"]] + adata.uns["ui"]["col_order"] = [str(item) for item in ui["col_order"]] + adata.uns["ui"]["selected_channels_ordered"] = selected_channels + + palette = _ensure_mapping(uns["palette"], "palette") + _validate_palette_block(palette.get("meta_cluster_colors_present"), "palette.meta_cluster_colors_present") + _validate_palette_block(palette.get("meta_cluster_colors_all"), "palette.meta_cluster_colors_all") + + zscore_params = _ensure_mapping(uns["zscore_params"], "zscore_params") + per_marker = _ensure_mapping(zscore_params.get("per_marker"), "zscore_params.per_marker") + if list(per_marker.keys()) != list(map(str, adata.var_names)): + raise ValueError("zscore_params.per_marker must align exactly with var_names") + for marker, stats in per_marker.items(): + stats_mapping = _ensure_mapping(stats, f"zscore_params.per_marker[{marker!r}]") + if "mean" not in stats_mapping or "std" not in stats_mapping: + raise ValueError(f"zscore_params.per_marker[{marker!r}] must contain mean and std") + + marker_sets = _ensure_mapping(uns["marker_sets"], "marker_sets") + flowsom = _ensure_mapping(uns["flowsom"], "flowsom") + row_linkage_basis = _ensure_mapping(uns["row_linkage_basis"], "row_linkage_basis") + + training = _normalize_name_list(marker_sets.get("training"), "marker_sets.training") + linkage_markers = _normalize_name_list(marker_sets.get("linkage"), "marker_sets.linkage") + flowsom_training = _normalize_name_list(flowsom.get("training_markers"), "flowsom.training_markers") + basis_markers = _normalize_name_list(row_linkage_basis.get("marker_ids"), "row_linkage_basis.marker_ids") + if flowsom_training != training: + raise ValueError("flowsom.training_markers must match marker_sets.training") + if basis_markers != linkage_markers: + raise ValueError("row_linkage_basis.marker_ids must match marker_sets.linkage") + adata.uns["marker_sets"]["training"] = training + adata.uns["marker_sets"]["linkage"] = linkage_markers + adata.uns["flowsom"]["training_markers"] = flowsom_training + adata.uns["row_linkage_basis"]["marker_ids"] = basis_markers + + if source_path is not None: + expected_file_sha = checksums.get("h5ad_sha256") + if expected_file_sha and expected_file_sha != ZERO_SHA256: + actual_file_sha = _normalized_h5ad_sha256(source_path, str(expected_file_sha)) + if actual_file_sha != expected_file_sha: + raise ValueError("artifact checksum mismatch for on-disk h5ad bytes") + + return adata + + +def write_h5ad_atomic(adata: ad.AnnData, dst_path: str | Path) -> None: + """Write a validated artifact atomically and persist its on-disk checksum.""" + + validated = validate_artifact(adata) + target = Path(dst_path) + target.parent.mkdir(parents=True, exist_ok=True) + fd, tmp_path = tempfile.mkstemp( + dir=str(target.parent), + prefix=f".{target.name}.tmp", + suffix=".h5ad", + ) + os.close(fd) + tmp = Path(tmp_path) + working = validated.copy() + try: + checksums = _ensure_mapping(working.uns["artifact"]["checksums"], "artifact.checksums") + checksums["h5ad_sha256"] = ZERO_SHA256 + working.write_h5ad(tmp) + + file_sha = _normalized_h5ad_sha256(tmp, ZERO_SHA256) + checksums["h5ad_sha256"] = file_sha + working.write_h5ad(tmp) + + if _normalized_h5ad_sha256(tmp, file_sha) != file_sha: + raise ValueError("failed to persist a stable h5ad checksum") + + atomic_replace(tmp, target) + _ensure_mapping(adata.uns["artifact"]["checksums"], "artifact.checksums")["h5ad_sha256"] = file_sha + except Exception: + try: + tmp.unlink() + except OSError: + pass + raise + + +def _normalize_ui(display: Mapping[str, Any], meta: Mapping[str, Any], obs_names: list[str], var_names: list[str]) -> dict[str, Any]: + row_order = _normalize_order( + _coalesce((display, meta), ("ui.row_order", "row_order"), default=obs_names), + universe=obs_names, + label="row_order", + ) + col_order = _normalize_order( + _coalesce((display, meta), ("ui.col_order", "col_order"), default=var_names), + universe=var_names, + label="col_order", + ) + selected = _coalesce( + (display, meta), + ("ui.selected_channels_ordered", "selected_channels_ordered", "selected_channels"), + default=var_names, + ) + return { + "orientation": copy.deepcopy(_coalesce((display, meta), ("ui.orientation", "orientation"), default={})), + "row_sort": copy.deepcopy(_coalesce((display, meta), ("ui.row_sort", "row_sort"), default={})), + "col_sort": copy.deepcopy(_coalesce((display, meta), ("ui.col_sort", "col_sort"), default={})), + "selected_channels_ordered": _normalize_name_list(selected, "selected_channels_ordered"), + "row_order": row_order, + "col_order": col_order, + } + + +def _normalize_palette(display: Mapping[str, Any], meta: Mapping[str, Any]) -> dict[str, Any]: + palette = _ensure_mapping(_coalesce((display, meta), ("palette",), default={}), "palette") + normalized = { + "meta_cluster_colors_present": copy.deepcopy(palette.get("meta_cluster_colors_present", [])), + "meta_cluster_colors_all": copy.deepcopy(palette.get("meta_cluster_colors_all", [])), + } + _validate_palette_block(normalized["meta_cluster_colors_present"], "palette.meta_cluster_colors_present") + _validate_palette_block(normalized["meta_cluster_colors_all"], "palette.meta_cluster_colors_all") + return normalized + + +def _normalize_zscore_params(display: Mapping[str, Any], meta: Mapping[str, Any], var_names: list[str]) -> dict[str, Any]: + payload = _ensure_mapping(_coalesce((display, meta), ("zscore_params",), default=_MISSING), "zscore_params") + stats_source = payload.get("per_marker", payload.get("stats")) + if stats_source is _MISSING or stats_source is None: + raise ValueError("zscore_params.per_marker is required for z-scored X") + + per_marker: dict[str, dict[str, float]] = {} + if isinstance(stats_source, Mapping): + missing = [name for name in var_names if name not in stats_source] + if missing: + raise ValueError(f"zscore_params.per_marker is missing marker stats for {missing!r}") + for name in var_names: + stats = _ensure_mapping(stats_source[name], f"zscore_params.per_marker[{name!r}]") + if "mean" not in stats or "std" not in stats: + raise ValueError(f"zscore_params.per_marker[{name!r}] must contain mean and std") + per_marker[name] = { + "mean": float(stats["mean"]), + "std": float(stats["std"]), + } + else: + raise ValueError("zscore_params.per_marker must be a mapping keyed by marker id") + + return { + "method": str(payload.get("method", "unknown")), + "per_marker": per_marker, + "clipped": bool(payload.get("clipped", False)), + } + + +def _normalize_filters(display: Mapping[str, Any], meta: Mapping[str, Any]) -> dict[str, Any]: + filters = _ensure_mapping(_coalesce((display, meta), ("filters",), default={}), "filters") + return { + "expr": copy.deepcopy(filters.get("expr")), + "structured": copy.deepcopy(filters.get("structured")), + "source": copy.deepcopy(filters.get("source")), + } + + +def _normalize_marker_sets( + display: Mapping[str, Any], + flowsom: Mapping[str, Any], + meta: Mapping[str, Any], + var_names: list[str], +) -> dict[str, list[str]]: + raw = _ensure_mapping(_coalesce((display, meta), ("marker_sets",), default={}), "marker_sets") + training = _normalize_name_list( + raw.get("training", flowsom.get("training_markers", flowsom.get("channels", var_names))), + "marker_sets.training", + ) + display_extra = _normalize_name_list( + raw.get("display_extra", []), + "marker_sets.display_extra", + allow_empty=True, + ) + available = _normalize_name_list(raw.get("available", var_names), "marker_sets.available") + linkage = _normalize_name_list(raw.get("linkage", training), "marker_sets.linkage") + expanded_training = _normalize_name_list( + raw.get("expanded_training", _unique(training + display_extra)), + "marker_sets.expanded_training", + ) + panel = _normalize_name_list(raw.get("panel", var_names), "marker_sets.panel") + return { + "training": training, + "display_extra": display_extra, + "available": available, + "linkage": linkage, + "expanded_training": expanded_training, + "panel": panel, + } + + +def _normalize_flowsom(flowsom: Mapping[str, Any], marker_sets: Mapping[str, list[str]]) -> dict[str, Any]: + params = _ensure_mapping(flowsom.get("params", {}), "flowsom.params") + grid = _ensure_mapping(flowsom.get("grid", {}), "flowsom.grid") + return { + "training_markers": _normalize_name_list( + flowsom.get("training_markers", marker_sets["training"]), + "flowsom.training_markers", + ), + "imputation": copy.deepcopy(_ensure_mapping(flowsom.get("imputation", {}), "flowsom.imputation")), + "projection": copy.deepcopy(_ensure_mapping(flowsom.get("projection", {}), "flowsom.projection")), + "availability": copy.deepcopy(_ensure_mapping(flowsom.get("availability", {}), "flowsom.availability")), + "seed": int(flowsom.get("seed", params.get("seed", 0))), + "grid": { + "xdim": int(grid.get("xdim", params.get("xdim", 0))), + "ydim": int(grid.get("ydim", params.get("ydim", 0))), + "rlen": int(grid.get("rlen", params.get("rlen", 0))), + }, + "params": copy.deepcopy(dict(params)), + "deps": list(flowsom.get("deps", [])), + "hashes": copy.deepcopy(_ensure_mapping(flowsom.get("hashes", {}), "flowsom.hashes")), + } + + +def _normalize_row_linkage_basis( + display: Mapping[str, Any], + meta: Mapping[str, Any], + marker_sets: Mapping[str, list[str]], +) -> dict[str, Any]: + basis = _ensure_mapping(_coalesce((display, meta), ("row_linkage_basis",), default={}), "row_linkage_basis") + return { + "marker_ids": _normalize_name_list( + basis.get("marker_ids", marker_sets["linkage"]), + "row_linkage_basis.marker_ids", + ), + "distance": copy.deepcopy(basis.get("distance")), + } + + +def _normalize_row_linkage(value: Any) -> list[Any]: + if value is None: + return [] + if isinstance(value, np.ndarray): + if value.ndim != 2: + raise ValueError("row_linkage must be a 2D linkage matrix") + return value.tolist() + if isinstance(value, (list, tuple)): + return copy.deepcopy(list(value)) + raise ValueError("row_linkage must be a list-like value") + + +def _normalize_checkpoint(meta: Mapping[str, Any]) -> dict[str, Any]: + checkpoint = _ensure_mapping(_coalesce((meta,), ("checkpoint",), default={}), "checkpoint") + created_at = checkpoint.get("created_at") + if created_at is None: + created_at = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") + normalized = { + "id": str(checkpoint.get("id", _uuid7_like())), + "parents": [str(value) for value in checkpoint.get("parents", [])], + "op": str(checkpoint.get("op", meta.get("op", "serialize_heatmap_state"))), + "description": str(checkpoint.get("description", meta.get("description", ""))), + "created_at": str(created_at), + "producer": copy.deepcopy(checkpoint.get("producer", meta.get("producer", {"name": "ueler"}))), + "id_namespace": str(checkpoint.get("id_namespace", meta.get("id_namespace", "ueler.cell_annotation"))), + } + if "step_id" in checkpoint or "step_id" in meta: + normalized["step_id"] = str(checkpoint.get("step_id", meta.get("step_id"))) + return normalized + + +def _refresh_checksums(adata: ad.AnnData) -> None: + checksums = _ensure_mapping(adata.uns["artifact"]["checksums"], "artifact.checksums") + checksums["x_sha256"] = _hash_array(np.asarray(adata.X, dtype=np.float32)) + checksums["row_order_sha256"] = _hash_json(adata.uns["ui"]["row_order"]) + checksums["col_order_sha256"] = _hash_json(adata.uns["ui"]["col_order"]) + if "median" in adata.layers: + checksums["median_sha256"] = _hash_array(np.asarray(adata.layers["median"], dtype=np.float32)) + checksums.setdefault("h5ad_sha256", ZERO_SHA256) + + +def _to_matrix(value: Any, *, shape: tuple[int, int], label: str) -> np.ndarray: + array = np.asarray(value, dtype=np.float32) + if array.ndim != 2: + raise ValueError(f"{label} must be a 2D matrix") + if tuple(array.shape) != tuple(shape): + raise ValueError(f"{label} has shape {tuple(array.shape)!r}, expected {shape!r}") + return np.ascontiguousarray(array, dtype=np.float32) + + +def _to_embedding(value: Any, *, n_obs: int, label: str) -> np.ndarray: + array = np.asarray(value, dtype=np.float32) + if array.ndim != 2: + raise ValueError(f"{label} must be a 2D matrix") + if array.shape[0] != n_obs: + raise ValueError(f"{label} must have {n_obs} rows") + return np.ascontiguousarray(array, dtype=np.float32) + + +def _normalize_name_list(value: Any, label: str, *, allow_empty: bool = False) -> list[str]: + if isinstance(value, str): + values = [value] + elif isinstance(value, (list, tuple, np.ndarray)): + values = list(value) + else: + raise ValueError(f"{label} must be a sequence of strings") + normalized = [str(item) for item in values] + if not normalized and not allow_empty: + raise ValueError(f"{label} must not be empty") + return normalized + + +def _normalize_order(value: Any, *, universe: list[str], label: str) -> list[str]: + if isinstance(value, (list, tuple, np.ndarray)): + values = list(value) + else: + raise ValueError(f"{label} must be a sequence") + if values and all(isinstance(item, (int, np.integer)) for item in values): + try: + ordered = [universe[int(index)] for index in values] + except (IndexError, ValueError) as exc: + raise ValueError(f"{label} contains invalid positional indices") from exc + _validate_order(ordered, universe, label) + return ordered + ordered = [str(item) for item in values] + _validate_order(ordered, universe, label) + return ordered + + +def _validate_order(value: Any, universe: list[str], label: str) -> None: + if not isinstance(value, (list, tuple, np.ndarray)): + raise ValueError(f"{label} must be a sequence") + normalized = [str(item) for item in value] + if sorted(normalized) != sorted(universe): + raise ValueError(f"{label} must be a permutation of the corresponding axis names") + + +def _validate_palette_block(value: Any, label: str) -> None: + if value is None: + return + if isinstance(value, Mapping): + entries = value.values() + elif isinstance(value, (list, tuple)): + entries = value + else: + raise ValueError(f"{label} must be a mapping or sequence of colors") + for color in entries: + if color is None: + continue + if not isinstance(color, str) or not _HEX_COLOR_RE.fullmatch(color): + raise ValueError(f"{label} contains invalid hex color {color!r}") + + +def _coalesce(mappings: tuple[Mapping[str, Any], ...], keys: tuple[str, ...], default: Any = _MISSING) -> Any: + for mapping in mappings: + for key in keys: + value = _lookup(mapping, key) + if value is not _MISSING: + return value + if default is not _MISSING: + return default + raise ValueError(f"missing required value; looked for {keys!r}") + + +def _lookup(mapping: Mapping[str, Any], key: str) -> Any: + current: Any = mapping + for part in key.split("."): + if not isinstance(current, Mapping) or part not in current: + return _MISSING + current = current[part] + return current + + +def _ensure_mapping(value: Any, label: str) -> dict[str, Any]: + if value is None: + return {} + if not isinstance(value, Mapping): + raise ValueError(f"{label} must be a mapping") + if isinstance(value, dict): + return value + return dict(value) + + +def _unique(values: list[str]) -> list[str]: + seen: set[str] = set() + ordered: list[str] = [] + for value in values: + if value not in seen: + seen.add(value) + ordered.append(value) + return ordered + + +def _hash_json(value: Any) -> str: + return hashlib.sha256( + json.dumps(_json_ready(value), sort_keys=True, separators=(",", ":")).encode("utf-8") + ).hexdigest() + + +def _hash_array(array: np.ndarray) -> str: + payload = np.ascontiguousarray(array) + digest = hashlib.sha256() + digest.update(str(payload.dtype).encode("utf-8")) + digest.update(json.dumps(payload.shape).encode("utf-8")) + digest.update(payload.tobytes(order="C")) + return digest.hexdigest() + + +def _json_ready(value: Any) -> Any: + if isinstance(value, np.ndarray): + return [_json_ready(item) for item in value.tolist()] + if isinstance(value, Mapping): + return {str(key): _json_ready(item) for key, item in value.items()} + if isinstance(value, (list, tuple)): + return [_json_ready(item) for item in value] + if isinstance(value, np.generic): + return value.item() + return value + + +def _normalized_h5ad_sha256(path: str | Path, embedded_value: str) -> str: + raw = Path(path).read_bytes() + token = embedded_value.encode("ascii") + if token not in raw: + raise ValueError("unable to locate embedded h5ad checksum marker in artifact bytes") + normalized = raw.replace(token, ZERO_SHA256.encode("ascii"), 1) + return hashlib.sha256(normalized).hexdigest() + + +def _uuid7_like() -> str: + timestamp_ms = int(time.time() * 1000) + time_high = timestamp_ms & ((1 << 48) - 1) + random_bits = uuid.uuid4().int & ((1 << 74) - 1) + value = (time_high << 80) | (0x7 << 76) | (((random_bits >> 62) & 0x0FFF) << 64) | (0x2 << 62) | (random_bits & ((1 << 62) - 1)) + return str(uuid.UUID(int=value)) From f017e435398b87dcc1ecd3fcdaf2f61569e7202f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Mar 2026 00:07:46 +0000 Subject: [PATCH 3/3] fix: pin serializer dependency compatibility Co-authored-by: yulewu <38241047+yulewu@users.noreply.github.com> --- pyproject.toml | 3 ++- ueler/viewer/plugin/cell_annotation/serialize.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 85e88c2..598d490 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,8 @@ classifiers = [ ] dependencies = [ "anndata>=0.11,<0.12", - "numpy>=1.23", + "numpy>=1.23,<2", + "pandas>=1.5,<3", ] [project.optional-dependencies] diff --git a/ueler/viewer/plugin/cell_annotation/serialize.py b/ueler/viewer/plugin/cell_annotation/serialize.py index 39390d4..bd5ae13 100644 --- a/ueler/viewer/plugin/cell_annotation/serialize.py +++ b/ueler/viewer/plugin/cell_annotation/serialize.py @@ -22,12 +22,15 @@ _dask_stub.__spec__ = importlib.machinery.ModuleSpec("dask", loader=None) if _dask_stub is not None and not hasattr(_dask_stub, "__path__"): # pragma: no cover - test bootstrap quirk _dask_stub.__path__ = [] -if _dask_stub is not None and "dask.array" not in sys.modules: # pragma: no cover - test bootstrap quirk +_dask_array_stub = sys.modules.get("dask.array") +if _dask_array_stub is None and _dask_stub is not None: # pragma: no cover - test bootstrap quirk dask_array_stub = types.ModuleType("dask.array") - dask_array_stub.Array = type("Array", (), {}) dask_array_stub.__spec__ = importlib.machinery.ModuleSpec("dask.array", loader=None) sys.modules["dask.array"] = dask_array_stub _dask_stub.array = dask_array_stub + _dask_array_stub = dask_array_stub +if _dask_array_stub is not None and not hasattr(_dask_array_stub, "Array"): # pragma: no cover - test bootstrap quirk + _dask_array_stub.Array = type("Array", (), {}) import anndata as ad import numpy as np