From f450c157e23b22f32ab446296c562525fa076d12 Mon Sep 17 00:00:00 2001 From: "Toby C. Cornish" Date: Sun, 28 Jun 2026 14:08:48 -0500 Subject: [PATCH 1/2] test(qa): manual cross-tool QA toolkit; fix DICOM engine edge frames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add scripts/qa/ — a manual QA harness for exercising wsitools across formats and confirming outputs in real viewers before a release: - run-matrix.sh: generate a broad matrix of conversions/transforms/edits + a manifest (containers, codecs, --factor/--rect/--tile-size, downsample, crop ±lossless, transcode, cross-format → svs, → dicom, associated edits). - check-openslide.sh: auto-validate openslide-readable outputs (open, render deepest level — catches the edge-tile "dimensional mismatch" — level count, associated images). - check-bioformats.sh: auto-validate via Bio-Formats showinf (QuPath proxy), optional pixel-decode crop. - MANUAL-TEST-PLAN.md: rubric (the real-viewer failure modes) + per-viewer checklists (ImageScope/QuPath/Hamamatsu/OpenSeadragon/Iris validator) + known N/A gaps. The toolkit immediately found a real bug: convert --to dicom --factor (the retile-engine DICOM path) emitted partial edge frames at truncated size, but DICOM TILED_FULL requires every frame to be exactly Rows×Columns — OpenSlide's DICOM reader rejected it ("Dimensional mismatch reading JPEG, expected 256x256, got 180x129"). Same class as the v0.24.1 TIFF edge-tile fix, which only covered codecTileEncoder. dicomFrameEncoder now edge-replicates partial frames up to the full frame size too (reusing padRGBTileReplicate). Regression test: TestDICOMEdgeFramesAreFullSize. Verified clean against OpenSlide. Known interop gap surfaced (not fixed; experimental): OpenSlide's Ventana reader rejects our synthesized BIF TileJointInfo Direction="LEFT"/"UP" ("Bad direction attribute"); Bio-Formats/QuPath/opentile read our BIF fine. Documented in the manual plan. Co-Authored-By: Claude Opus 4.8 (1M context) --- cmd/wsitools/dicom_engine.go | 17 +- scripts/qa/MANUAL-TEST-PLAN.md | 130 ++++++++++++++ scripts/qa/check-bioformats.sh | 71 ++++++++ scripts/qa/check-openslide.sh | 78 ++++++++ scripts/qa/run-matrix.sh | 168 ++++++++++++++++++ .../svs_aperio_conformance_test.go | 56 ++++++ 6 files changed, 518 insertions(+), 2 deletions(-) create mode 100644 scripts/qa/MANUAL-TEST-PLAN.md create mode 100755 scripts/qa/check-bioformats.sh create mode 100755 scripts/qa/check-openslide.sh create mode 100755 scripts/qa/run-matrix.sh diff --git a/cmd/wsitools/dicom_engine.go b/cmd/wsitools/dicom_engine.go index 398cb2d..412fa77 100644 --- a/cmd/wsitools/dicom_engine.go +++ b/cmd/wsitools/dicom_engine.go @@ -28,6 +28,9 @@ func runDICOMEngine(ctx context.Context, slide *opentile.Slide, srcRegion openti if err != nil { return err } + // Pad partial edge frames up to the full frame size (DICOM TILED_FULL frames + // are uniform Rows×Columns); all levels share one square tile size. + enc.tileW, enc.tileH = levels[0].TileW, levels[0].TileH defer enc.Close() spoolDir, err := os.MkdirTemp("", "wsitools-dcm-spool-*") @@ -162,8 +165,9 @@ func (l *spoolLevel) DecodedTile(x, y int) (*decoder.Image, error) { // EncodeStandalone; J2K-family codecs (jpeg2000/htj2k) already return a complete // codestream from EncodeTile. type dicomFrameEncoder struct { - jpeg *jpegcodec.Encoder // non-nil for jpeg - codec codec.Encoder // non-nil for j2k-family + jpeg *jpegcodec.Encoder // non-nil for jpeg + codec codec.Encoder // non-nil for j2k-family + tileW, tileH int // full frame size; partial edge tiles are padded up to it } // newDicomFrameEncoder builds the frame encoder + reports the source.Compression @@ -202,6 +206,15 @@ func newJ2KFrameEncoder(codecName string, quality int) (*dicomFrameEncoder, sour } func (e *dicomFrameEncoder) EncodeTile(rgb []byte, w, h int) ([]byte, error) { + // DICOM TILED_FULL requires every frame to be exactly Rows×Columns; the engine + // hands partial right/bottom edge frames (and whole levels smaller than one + // frame) at truncated content size. Pad up to the full frame (edge-replicated) + // so strict readers (OpenSlide's DICOM reader, pydicom consumers) don't hit a + // frame/dimension mismatch — mirrors codecTileEncoder for the TIFF family. + if e.tileW > 0 && e.tileH > 0 && (w < e.tileW || h < e.tileH) { + rgb = padRGBTileReplicate(rgb, w, h, e.tileW, e.tileH) + w, h = e.tileW, e.tileH + } if e.jpeg != nil { return e.jpeg.EncodeStandalone(rgb, w, h) } diff --git a/scripts/qa/MANUAL-TEST-PLAN.md b/scripts/qa/MANUAL-TEST-PLAN.md new file mode 100644 index 0000000..a8b1318 --- /dev/null +++ b/scripts/qa/MANUAL-TEST-PLAN.md @@ -0,0 +1,130 @@ +# wsitools manual QA plan + +A checklist for **manually** exercising wsitools across formats/transforms and +confirming the outputs in real viewers. The helper scripts here generate a broad +matrix of outputs and auto-validate the ones that can be machine-checked; the +rest you open by eye in the viewers you have. + +This is deliberately *not* a programmatic test suite (those live in `go test`). +It's the "did we actually break anything a real viewer cares about" pass to run +before a release. + +## 0. Workflow + +```sh +# 1. Generate the output matrix (builds wsitools from the repo). +scripts/qa/run-matrix.sh # add --big to also drive NDPI + IFE/Iris sources +# -> /tmp/wsitools-qa/cases/* + manifest.tsv (override with OUT=/path) + +# 2. Auto-validate the machine-checkable outputs. +scripts/qa/check-openslide.sh # OpenSlide = Aperio-ecosystem gold oracle +scripts/qa/check-bioformats.sh # Bio-Formats = what QuPath uses underneath +scripts/qa/check-bioformats.sh --pixels # also decode a 256x256 crop per output + +# 3. Eyeball the rendered PNGs from the OpenSlide pass. +open /tmp/wsitools-qa/openslide/*.png + +# 4. Hand-open the GUI-only artifacts (QuPath / ImageScope / Hamamatsu / browser) +# per the tables below. +``` + +`manifest.tsv` columns: `id category description source output status`. +Every row's `output` is under `OUT/cases/`. + +## 1. What to look for (the rubric) + +These are the failure modes wsitools has actually hit. Check each opened slide +against them: + +| # | Symptom | What it means | +|---|---------|---------------| +| R1 | **Colours wrong** (blue/orange swapped, oversaturated) | Photometric/Subsampling tag vs JPEG framing mismatch | +| R2 | **Black blocks / garbled stripes at right or bottom edges** | Edge tiles not padded to full tile size (TIFF/DICOM) | +| R3 | **A pyramid/zoom level missing** (big jump in zoom detail) | SVS thumbnail not at IFD 1 → ImageScope ate a level | +| R4 | **Wrong physical scale** (scale bar / µm wrong) | MPP / magnification metadata dropped or mis-scaled | +| R5 | **Label / macro / overview missing or wrong** | Associated-image copy/edit defect | +| R6 | **Won't open at all** | Structural/container conformance defect | +| R7 | **Truncated tissue / wrong dimensions** | Level dims or crop rect handling defect | + +`info` + `validate` (run automatically in matrix section A) cover R4/R5/R6 at the +metadata level; the viewers confirm the pixels. + +## 2. Auto-validated (no GUI needed) + +| Tool | Reads | Catches | Run | +|------|-------|---------|-----| +| **OpenSlide** | svs, generic tiled tiff, cog-wsi, dicom, ndpi, bif, mrxs, scn, philips | R2 (dimensional mismatch on render), R3 (level count), R5 (associated list), R6 | `check-openslide.sh` | +| **Bio-Formats** | the above + ome-tiff (+ proxy for QuPath) | R6 (parse/IFD/OME-XML errors), R7 (series dims), pixel decode with `--pixels` | `check-bioformats.sh` | + +Both print `OK / FAIL / N/A`. `N/A` = that tool can't read that container/codec +(expected — see §4); only `FAIL` needs attention. The OpenSlide pass also writes +a deepest-level PNG per slide to `OUT/openslide/` — flip through them for R1/R2/R7. + +## 3. Manual viewers (open by hand) + +### ImageScope (Windows — strict Aperio reader; the toughest critic) +Open these from `OUT/cases/` and check R1/R2/R3/R4/R5: + +| Artifact | Why it matters | +|----------|----------------| +| `b_svs.svs` | baseline SVS round-trip | +| `e_bif2svs.svs`, `e_ome2svs.svs`, `e_cog2svs.svs`, `e_dicom2svs.svs` | cross-format → SVS (R1/R2/R3 regression set) | +| `e_ife2svs.svs` (needs `--big`) | IFE/Iris → SVS (the 4×-pyramid case) | +| `d_factor2.svs`, `d_rect.svs`, `d_tile512.svs`, `d_crop.svs`, `d_crop_lossless.svs` | transforms — check edges (R2) + scale (R4) | +| `g_label_replaced.svs`, `g_label_removed.svs`, `g_overview_removed.svs`, `g_macro_replaced.svs` | associated edits (R5); open Image → "View Label / View Thumbnail" | + +In the ImageScope **Image Information** panel verify: all pyramid levels present +with sensible ratios (R3), MPP + AppMag correct (R4), Label/Thumbnail tabs +populated (R5). + +### QuPath (cross-platform — Bio-Formats + OpenSlide) +Open the same SVS set plus `b_ome.ome.tiff`, `b_tiff.tiff`, `b_cog.tiff`, +`b_dicom/` (point at a `.dcm`). Check R1/R2/R4/R7 at multiple zooms; QuPath's +status bar shows µm/px (R4). OME-TIFF is QuPath's strong suit — confirm +`b_ome.ome.tiff` and `e_ome2svs.svs` look right. + +### Hamamatsu viewer (NDPI) +Hamamatsu's viewer is for native NDPI. Use it on the **source** `ndpi/*.ndpi` +fixtures to confirm the source reads (sanity), and on any NDPI you produce. (wsitools +does not write NDPI, so this is mostly source-side / read-side confirmation.) + +### Browser / OpenSeadragon (DZI, SZI) +OpenSlide/Bio-Formats can't read DZI/SZI. Validate them as tiled web pyramids: +- `b_dzi.dzi` + `b_dzi_files/` — load in any DZI viewer (OpenSeadragon demo page, + or VIPS `vipsdisp`). Check R2 (tile seams/edges), R7 (full extent), and that + deep zoom levels all load. +- `b_szi.szi` — the zipped DZI; unzip and inspect, or use an SZI-aware viewer. + +### Iris validator (IFE / `.iris`) +The official gold gate for IFE. In a venv: `pip install Iris-Codec`, then +`make ife-validate` (or the snippet in the Makefile). Validate `b_ife.iris` and, +with `--big`, the round-trip of `425248_JPEG.iris`. CI also runs this. + +## 4. Expected `N/A` / known gaps (NOT failures) + +- **Novel codecs in TIFF** (`c_avif.tiff`, `c_htj2k.tiff`, `c_jpegxl.tiff`, + `c_webp.tiff`): no standard TIFF compression tag → OpenSlide & Bio-Formats + can't read them. They are **wsitools/opentile-only**; validate with + `wsitools info ` / `wsitools validate ` / `wsitools region`. (JPEG and + JPEG-2000 in TIFF/SVS *are* standard and read everywhere.) +- **DZI / SZI / IFE**: not readable by OpenSlide or Bio-Formats (see §3 for their + validators). +- **`b_bif.bif` in OpenSlide**: OpenSlide's Ventana reader rejects our synthesized + `TileJointInfo Direction="LEFT"/"UP"` ("Bad direction attribute"). Our BIF is + read correctly by **Bio-Formats / QuPath / opentile** (and round-trips + pixel-identical); `--to bif` is experimental. Known interop gap with OpenSlide's + BIF reader specifically — not a general defect. + +## 5. Pixel-equivalence spot checks (optional, exact) + +For conversions that should be pixel-faithful, compare against the source with +wsitools' own pixel hash (geometry-independent within a level): + +```sh +# Lossless / tile-copy conversions should match the source pixel hash: +wsitools hash --mode pixel sample_files/svs/CMU-1.svs +wsitools hash --mode pixel /tmp/wsitools-qa/cases/d_crop_lossless.svs # same region +# Render the same region from source and output and diff visually: +wsitools region --level 0 --rect 2000,2000,1024,1024 -o /tmp/src.png sample_files/svs/CMU-1.svs +wsitools region --level 0 --rect 1000,1000,1024,1024 -o /tmp/out.png /tmp/wsitools-qa/cases/d_crop.svs +``` diff --git a/scripts/qa/check-bioformats.sh b/scripts/qa/check-bioformats.sh new file mode 100755 index 0000000..f963300 --- /dev/null +++ b/scripts/qa/check-bioformats.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# +# check-bioformats.sh — auto-validate run-matrix.sh outputs with Bio-Formats +# (showinf). Bio-Formats is what QuPath uses under the hood, so a clean parse +# here is a good predictor of QuPath behaviour. For each readable output it: +# - parses metadata only (showinf -nopix) — catches structural/IFD/OME-XML errors, +# - reports series count + dimensions, +# - optionally decodes a small crop (bfconvert) to confirm pixels read. +# Bio-Formats can't read DZI/SZI/IFE — those are reported N/A. +# +# Usage: scripts/qa/check-bioformats.sh [--pixels] (OUT defaults to /tmp/wsitools-qa) +# --pixels also bfconvert a 256x256 crop of series 0 (slower; confirms decode) +# +set -uo pipefail +OUT="${OUT:-/tmp/wsitools-qa}" +CASES="$OUT/cases" +DEST="$OUT/bioformats" +PIX=0; [[ "${1:-}" == "--pixels" ]] && PIX=1 +mkdir -p "$DEST" + +command -v showinf >/dev/null 2>&1 || { echo "Bio-Formats 'showinf' not found (install bftools)"; exit 1; } +[[ -d "$CASES" ]] || { echo "no cases dir at $CASES — run run-matrix.sh first"; exit 1; } + +pass=0; fail=0; na=0 +printf "%-28s %-8s %s\n" "OUTPUT" "RESULT" "DETAIL" +printf -- "---------------------------------------------------------------------------\n" + +for path in "$CASES"/*; do + name="$(basename "$path")" + case "$name" in + *.dzi|*.szi|*.iris) printf "%-28s %-8s %s\n" "$name" "N/A" "Bio-Formats can't read this container"; na=$((na+1)); continue ;; + esac + # DICOM output is a directory of .dcm — point Bio-Formats at one instance. + target="$path" + if [[ -d "$path" ]]; then + target="$(find "$path" -maxdepth 1 -name '*.dcm' | head -1)" + [[ -z "$target" ]] && { printf "%-28s %-8s %s\n" "$name" "N/A" "no .dcm in dir"; na=$((na+1)); continue; } + fi + + log="$DEST/$name.showinf.log" + showinf -nopix -no-upgrade "$target" >"$log" 2>&1 + rc=$? + # Novel codecs (AVIF/HTJ2K/JPEG-XL/WebP in TIFF) have no Bio-Formats codec — + # that's an expected reader limitation (same as OpenSlide N/A), not a defect. + if grep -qiE "Unable to find TiffCompres|unsupported compression" "$log"; then + printf "%-28s %-8s %s\n" "$name" "N/A" "Bio-Formats has no codec for this tile compression" + na=$((na+1)); continue + fi + if [[ $rc -ne 0 ]] || grep -qiE "exception|cannot read|error reading|unsupported" "$log"; then + printf "%-28s %-8s %s\n" "$name" "PARSEFAIL" "$(grep -iE 'exception|cannot|error|unsupported' "$log" | head -1)" + fail=$((fail+1)); continue + fi + series="$(sed -n 's/^Series count = \([0-9]*\)/\1/p' "$log" | head -1)" + dims="$(grep -m1 'Width = ' "$log" | sed 's/.*Width = //')x$(grep -m1 'Height = ' "$log" | sed 's/.*Height = //')" + + detail="series=${series:-?} dim0=${dims}" + result="OK" + if [[ "$PIX" == 1 ]]; then + if bfconvert -overwrite -series 0 -crop 0,0,256,256 "$target" "$DEST/$name.crop.png" >"$DEST/$name.bfconvert.log" 2>&1; then + detail="$detail pixels=OK" + else + detail="$detail pixels=FAIL"; result="PIXFAIL"; fail=$((fail+1)) + fi + fi + [[ "$result" == "OK" ]] && pass=$((pass+1)) + printf "%-28s %-8s %s\n" "$name" "$result" "$detail" +done + +echo +echo "Bio-Formats: $pass OK, $fail FAIL, $na N/A. showinf logs in $DEST" +[[ "$fail" -gt 0 ]] && exit 1 || exit 0 diff --git a/scripts/qa/check-openslide.sh b/scripts/qa/check-openslide.sh new file mode 100755 index 0000000..fac8543 --- /dev/null +++ b/scripts/qa/check-openslide.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# +# check-openslide.sh — auto-validate the run-matrix.sh outputs with the OpenSlide +# CLI (the Aperio-ecosystem gold oracle). For every output OpenSlide can open it: +# - prints level count + per-level downsamples (catches dropped/duplicated levels), +# - renders the deepest pyramid level to a PNG (catches the "Dimensional mismatch +# reading JPEG" edge-tile bug and other read failures), +# - reports associated images (label/macro/thumbnail). +# +# OpenSlide can't read some containers/codecs (DZI/SZI/IFE/OME-TIFF, and novel +# codecs like JPEG-XL/AVIF/WebP, or JPEG2000 in a *generic* TIFF). Those are +# reported N/A — use Bio-Formats / the Iris validator / `wsitools validate` for +# them. A real FAIL is a container OpenSlide *should* read (e.g. a JPEG SVS) that +# errors on open or render. +# +# Usage: scripts/qa/check-openslide.sh (OUT defaults to /tmp/wsitools-qa) +# +set -uo pipefail +OUT="${OUT:-/tmp/wsitools-qa}" +CASES="$OUT/cases" +DEST="$OUT/openslide" +mkdir -p "$DEST" + +command -v openslide-show-properties >/dev/null 2>&1 || { echo "openslide CLI not found (brew install openslide)"; exit 1; } +[[ -d "$CASES" ]] || { echo "no cases dir at $CASES — run run-matrix.sh first"; exit 1; } + +# prop FILE KEY — extract a property value (KEY may contain [] brackets). +prop() { openslide-show-properties "$1" 2>/dev/null | grep -F "$2: '" | head -1 | sed "s/.*: '\(.*\)'\$/\1/"; } +# is the open-error an expected OpenSlide limitation (not a wsitools defect)? +expected_na() { grep -qiE "unsupported tiff compression|compression support is not configured|not a file that openslide can recognize|unsupported|cannot read" "$1"; } + +pass=0; fail=0; na=0 +printf "%-28s %-9s %s\n" "OUTPUT" "RESULT" "DETAIL" +printf -- "---------------------------------------------------------------------------\n" + +for path in "$CASES"/*; do + name="$(basename "$path")" + case "$name" in + *.dzi|*.szi|*.iris|*.ome.tiff) printf "%-28s %-9s %s\n" "$name" "N/A" "OpenSlide can't read this container"; na=$((na+1)); continue ;; + esac + + # DICOM output is a directory of .dcm — OpenSlide opens it via one instance. + target="$path" + [[ -d "$path" ]] && target="$(find "$path" -maxdepth 1 -name '*.dcm' | sort | head -1)" + [[ -z "$target" ]] && { printf "%-28s %-9s %s\n" "$name" "N/A" "no .dcm in dir"; na=$((na+1)); continue; } + + if ! openslide-show-properties "$target" >/dev/null 2>"$DEST/$name.openerr"; then + if expected_na "$DEST/$name.openerr"; then + printf "%-28s %-9s %s\n" "$name" "N/A" "$(head -1 "$DEST/$name.openerr" | sed 's#.*: ##')" + na=$((na+1)) + else + printf "%-28s %-9s %s\n" "$name" "OPENFAIL" "$(head -1 "$DEST/$name.openerr")" + fail=$((fail+1)) + fi + continue + fi + + lc="$(prop "$target" openslide.level-count)"; lc="${lc:-1}" + last=$((lc-1)) + lw="$(prop "$target" "openslide.level[$last].width")" + lh="$(prop "$target" "openslide.level[$last].height")" + downs="$(openslide-show-properties "$target" 2>/dev/null | sed -n "s/^openslide.level\[[0-9]*\].downsample: '\(.*\)'\$/\1/p" | awk '{printf "%.0f ",$1}')" + assoc="$(openslide-show-properties "$target" 2>/dev/null | sed -n "s/^openslide.associated.\([a-z]*\)\..*/\1/p" | sort -u | tr '\n' ',' | sed 's/,$//')" + + rerr="$DEST/$name.readerr" + if [[ -n "$lw" && -n "$lh" ]] && openslide-write-png "$target" 0 0 "$last" "$lw" "$lh" "$DEST/$name.png" 2>"$rerr"; then + printf "%-28s %-9s L=%s ds=[%s] assoc=[%s]\n" "$name" "OK" "$lc" "${downs% }" "${assoc:-none}" + pass=$((pass+1)) + else + printf "%-28s %-9s %s\n" "$name" "READFAIL" "$(head -1 "$rerr" 2>/dev/null) (level $last ${lw}x${lh})" + fail=$((fail+1)) + fi +done + +echo +echo "OpenSlide: $pass OK, $fail FAIL, $na N/A. Rendered PNGs + error logs in $DEST" +echo "Eyeball $DEST/*.png: colours correct, no black/garbled edges, full tissue extent." +[[ "$fail" -gt 0 ]] && exit 1 || exit 0 diff --git a/scripts/qa/run-matrix.sh b/scripts/qa/run-matrix.sh new file mode 100755 index 0000000..bbe4dd2 --- /dev/null +++ b/scripts/qa/run-matrix.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +# +# run-matrix.sh — exercise a broad matrix of wsitools functions and write the +# resulting artifacts (plus a manifest) into an output directory for manual +# inspection in viewers (OpenSlide, QuPath, ImageScope, Hamamatsu, Bio-Formats). +# +# This is NOT a programmatic test. It just *generates* outputs and a manifest; +# you then eyeball them (see MANUAL-TEST-PLAN.md) and/or run the auto-validators +# (check-openslide.sh, check-bioformats.sh). +# +# Usage: +# scripts/qa/run-matrix.sh [--big] [--clean] +# +# Env overrides: +# WSITOOLS=/path/to/wsitools use a prebuilt binary (else built from this repo) +# SRC=/path/to/sample_files source fixtures (default: ./sample_files) +# OUT=/path/to/outdir output dir (default: /tmp/wsitools-qa) +# +# Flags: +# --big also run cases driven by large sources (NDPI, IFE/Iris) — slow, GBs +# --clean remove OUT before running +# +set -uo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +SRC="${SRC:-$ROOT/sample_files}" +OUT="${OUT:-/tmp/wsitools-qa}" +BIG=0 +CLEAN=0 +for a in "$@"; do + case "$a" in + --big) BIG=1 ;; + --clean) CLEAN=1 ;; + *) echo "unknown flag: $a" >&2; exit 2 ;; + esac +done + +# ---- resolve the wsitools binary ------------------------------------------- +if [[ -n "${WSITOOLS:-}" ]]; then + BIN="$WSITOOLS" +else + BIN="$OUT/_bin/wsitools" + mkdir -p "$OUT/_bin" + echo ">> building wsitools from $ROOT ..." + ( cd "$ROOT" && go build -o "$BIN" ./cmd/wsitools ) || { echo "build failed"; exit 1; } +fi +echo ">> wsitools: $BIN ($($BIN version 2>/dev/null | head -1))" +echo ">> sources: $SRC" +echo ">> output: $OUT" + +[[ "$CLEAN" == 1 ]] && rm -rf "$OUT"/cases "$OUT"/manifest.tsv "$OUT"/logs +mkdir -p "$OUT/cases" "$OUT/logs" "$OUT/_assets" +MANIFEST="$OUT/manifest.tsv" +printf "id\tcategory\tdescription\tsource\toutput\tstatus\n" > "$MANIFEST" + +# ---- helpers ---------------------------------------------------------------- +# run ID CATEGORY "DESC" SRCFILE OUTPATH -- cmd... +run() { + local id="$1" cat="$2" desc="$3" srcf="$4" outp="$5"; shift 5 + [[ "$1" == "--" ]] && shift + if [[ -n "$srcf" && ! -e "$srcf" ]]; then + printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$id" "$cat" "$desc" "${srcf#$SRC/}" "${outp#$OUT/}" "SKIP(no-src)" >> "$MANIFEST" + printf " [%s] SKIP (missing %s)\n" "$id" "${srcf#$SRC/}" + return + fi + local log="$OUT/logs/$id.log" + if "$@" >"$log" 2>&1; then + printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$id" "$cat" "$desc" "${srcf#$SRC/}" "${outp#$OUT/}" "OK" >> "$MANIFEST" + printf " [%s] OK %s\n" "$id" "$desc" + else + printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$id" "$cat" "$desc" "${srcf#$SRC/}" "${outp#$OUT/}" "FAIL" >> "$MANIFEST" + printf " [%s] FAIL %s (see logs/%s.log)\n" "$id" "$desc" "$id" + fi +} + +C="$OUT/cases" + +# ---- pick representative sources (smallest per format) ---------------------- +SVS_SMALL="$SRC/svs/CMU-1-Small-Region.svs" # 1-level jpeg + thumbnail/label/overview +SVS_MULTI="$SRC/svs/CMU-1.svs" # 3-level jpeg, full associated +SVS_JP2K="$SRC/svs/JP2K-33003-1.svs" # 3-level jpeg2000 +BIF="$SRC/bif/S12-18199-1A.bif" # Ventana DP200 (stitched) +OME="$SRC/ome-tiff/CMU-1-Small-Region.ome.tiff" +COG="$SRC/cog-wsi/CMU-1-Small-Region_cog-wsi.tiff" +DICOM="$SRC/dicom/scan_621_grundium_dicom" +NDPI="$SRC/ndpi/CMU-1.ndpi" # big +IFE="$SRC/ife/425248_JPEG.iris" # big + +echo; echo "== A. read-side inspection (sanity) ==" +for f in "$SVS_SMALL" "$BIF" "$OME" "$COG" "$DICOM" "$SVS_JP2K"; do + [[ -e "$f" ]] || continue + b="$(basename "$f")" + run "info-$b" read "info $b" "$f" "$OUT/logs/info-$b.log" -- "$BIN" info "$f" + run "validate-$b" read "validate $b" "$f" "$OUT/logs/validate-$b.log" -- "$BIN" validate "$f" +done +[[ "$BIG" == 1 ]] && for f in "$NDPI" "$IFE"; do [[ -e "$f" ]] && run "info-$(basename "$f")" read "info $(basename "$f")" "$f" "$OUT/logs/info-$(basename "$f").log" -- "$BIN" info "$f"; done + +echo; echo "== B. container conversions (from a small SVS) ==" +run b-svs container "SVS -> svs" "$SVS_SMALL" "$C/b_svs.svs" -- "$BIN" convert --to svs -f -o "$C/b_svs.svs" "$SVS_SMALL" +run b-tiff container "SVS -> tiff" "$SVS_SMALL" "$C/b_tiff.tiff" -- "$BIN" convert --to tiff -f -o "$C/b_tiff.tiff" "$SVS_SMALL" +run b-ome container "SVS -> ome-tiff" "$SVS_SMALL" "$C/b_ome.ome.tiff" -- "$BIN" convert --to ome-tiff -f -o "$C/b_ome.ome.tiff" "$SVS_SMALL" +run b-cog container "SVS -> cog-wsi" "$SVS_SMALL" "$C/b_cog.tiff" -- "$BIN" convert --to cog-wsi -f -o "$C/b_cog.tiff" "$SVS_SMALL" +run b-dzi container "SVS -> dzi" "$SVS_SMALL" "$C/b_dzi.dzi" -- "$BIN" convert --to dzi -f -o "$C/b_dzi.dzi" "$SVS_SMALL" +run b-szi container "SVS -> szi" "$SVS_SMALL" "$C/b_szi.szi" -- "$BIN" convert --to szi -f -o "$C/b_szi.szi" "$SVS_SMALL" +run b-dicom container "SVS -> dicom" "$SVS_SMALL" "$C/b_dicom" -- "$BIN" convert --to dicom -f -o "$C/b_dicom" "$SVS_SMALL" +run b-bif container "SVS -> bif" "$SVS_SMALL" "$C/b_bif.bif" -- "$BIN" convert --to bif -f -o "$C/b_bif.bif" "$SVS_SMALL" +run b-ife container "SVS -> ife(256)" "$SVS_SMALL" "$C/b_ife.iris" -- "$BIN" convert --to ife --tile-size 256 -f -o "$C/b_ife.iris" "$SVS_SMALL" + +echo; echo "== C. codec coverage (-> tiff) ==" +# jpeg (default) + jpeg2000 are standard-TIFF-conformant (ImageScope/Bio-Formats +# read them). jpegxl/avif/webp/htj2k have no standard TIFF compression tag, so +# they need --allow-nonconformant and are readable ONLY by wsitools/opentile — +# validate those with `wsitools info/validate`, not external viewers. +run c-jpeg2000 codec "SVS -> tiff --codec jpeg2000" "$SVS_SMALL" "$C/c_jpeg2000.tiff" -- "$BIN" convert --to tiff --codec jpeg2000 -f -o "$C/c_jpeg2000.tiff" "$SVS_SMALL" +for codec in jpegxl avif webp htj2k; do + run "c-$codec" codec-novel "SVS -> tiff --codec $codec (wsitools-only)" "$SVS_SMALL" "$C/c_$codec.tiff" -- "$BIN" convert --to tiff --codec "$codec" --allow-nonconformant -f -o "$C/c_$codec.tiff" "$SVS_SMALL" +done + +echo; echo "== D. transforms (factor / crop / tile-size / downsample / crop / transcode) ==" +run d-factor2 transform "SVS --factor 2 -> svs" "$SVS_MULTI" "$C/d_factor2.svs" -- "$BIN" convert --to svs --factor 2 -f -o "$C/d_factor2.svs" "$SVS_MULTI" +run d-factor4 transform "SVS --factor 4 -> tiff" "$SVS_MULTI" "$C/d_factor4.tiff" -- "$BIN" convert --to tiff --factor 4 -f -o "$C/d_factor4.tiff" "$SVS_MULTI" +run d-rect transform "SVS --rect crop -> svs" "$SVS_MULTI" "$C/d_rect.svs" -- "$BIN" convert --to svs --rect 0,0,4096,4096 -f -o "$C/d_rect.svs" "$SVS_MULTI" +run d-tilesize transform "SVS --tile-size 512 -> svs" "$SVS_MULTI" "$C/d_tile512.svs" -- "$BIN" convert --to svs --codec jpeg --tile-size 512 -f -o "$C/d_tile512.svs" "$SVS_MULTI" +run d-downs transform "downsample --factor 2 (svs)" "$SVS_MULTI" "$C/d_downsample.svs" -- "$BIN" downsample --factor 2 -f -o "$C/d_downsample.svs" "$SVS_MULTI" +run d-crop transform "crop (lossy) svs" "$SVS_MULTI" "$C/d_crop.svs" -- "$BIN" crop --rect 1000,1000,6000,6000 -f -o "$C/d_crop.svs" "$SVS_MULTI" +run d-cropll transform "crop --lossless svs" "$SVS_MULTI" "$C/d_crop_lossless.svs" -- "$BIN" crop --rect 1000,1000,6000,6000 --lossless -f -o "$C/d_crop_lossless.svs" "$SVS_MULTI" +run d-trans transform "transcode jpeg->jpeg2000 (svs)" "$SVS_MULTI" "$C/d_transcode.svs" -- "$BIN" transcode --codec jpeg2000 -f -o "$C/d_transcode.svs" "$SVS_MULTI" + +echo; echo "== E. cross-format -> SVS (the ImageScope-critical set) ==" +run e-bif crossfmt "BIF -> svs" "$BIF" "$C/e_bif2svs.svs" -- "$BIN" convert --to svs -f -o "$C/e_bif2svs.svs" "$BIF" +run e-ome crossfmt "OME-TIFF -> svs" "$OME" "$C/e_ome2svs.svs" -- "$BIN" convert --to svs -f -o "$C/e_ome2svs.svs" "$OME" +run e-cog crossfmt "COG-WSI -> svs" "$COG" "$C/e_cog2svs.svs" -- "$BIN" convert --to svs -f -o "$C/e_cog2svs.svs" "$COG" +run e-dicom crossfmt "DICOM -> svs" "$DICOM" "$C/e_dicom2svs.svs" -- "$BIN" convert --to svs -f -o "$C/e_dicom2svs.svs" "$DICOM" +if [[ "$BIG" == 1 ]]; then + run e-ndpi crossfmt "NDPI -> svs" "$NDPI" "$C/e_ndpi2svs.svs" -- "$BIN" convert --to svs -f -o "$C/e_ndpi2svs.svs" "$NDPI" + run e-ife crossfmt "IFE/Iris -> svs" "$IFE" "$C/e_ife2svs.svs" -- "$BIN" convert --to svs -f -o "$C/e_ife2svs.svs" "$IFE" +fi + +echo; echo "== F. -> DICOM transforms ==" +run f-dcm-factor crossfmt "SVS --factor 2 -> dicom" "$SVS_MULTI" "$C/f_svs2dicom_f2" -- "$BIN" convert --to dicom --factor 2 -f -o "$C/f_svs2dicom_f2" "$SVS_MULTI" +run f-dcm-dcm crossfmt "DICOM --factor 2 -> dicom" "$DICOM" "$C/f_dicom2dicom_f2" -- "$BIN" convert --to dicom --factor 2 -f -o "$C/f_dicom2dicom_f2" "$DICOM" + +echo; echo "== G. associated-image editing + extraction ==" +# Produce a replacement image from a source that has a label. +if [[ -e "$SVS_MULTI" ]]; then + "$BIN" extract --type label --format png -o "$OUT/_assets/label.png" "$SVS_MULTI" >/dev/null 2>&1 || true +fi +REPL="$OUT/_assets/label.png" +run g-lbl-rm associated "label remove (svs)" "$SVS_MULTI" "$C/g_label_removed.svs" -- "$BIN" label remove -o "$C/g_label_removed.svs" --overwrite "$SVS_MULTI" +if [[ -e "$REPL" ]]; then + run g-lbl-rep associated "label replace (svs)" "$SVS_MULTI" "$C/g_label_replaced.svs" -- "$BIN" label replace --image "$REPL" -o "$C/g_label_replaced.svs" --overwrite "$SVS_MULTI" + run g-mac-rep associated "macro replace (svs)" "$SVS_MULTI" "$C/g_macro_replaced.svs" -- "$BIN" macro replace --image "$REPL" -o "$C/g_macro_replaced.svs" --overwrite "$SVS_MULTI" +fi +run g-ovr-rm associated "overview remove (svs)" "$SVS_MULTI" "$C/g_overview_removed.svs" -- "$BIN" overview remove -o "$C/g_overview_removed.svs" --overwrite "$SVS_MULTI" +# Extraction of each associated type the source actually carries (CMU-1 has +# thumbnail/label/overview, no macro). +for t in label overview thumbnail; do + run "g-ext-$t" associated "extract $t -> png" "$SVS_MULTI" "$OUT/_assets/extracted_$t.png" -- "$BIN" extract --type "$t" --format png -o "$OUT/_assets/extracted_$t.png" "$SVS_MULTI" +done + +echo +echo "===========================================================================" +awk -F'\t' 'NR>1{c[$6]++} END{for(k in c) printf " %-12s %d\n", k, c[k]}' "$MANIFEST" +echo " manifest: $MANIFEST" +echo " next: scripts/qa/check-openslide.sh (auto-validate openslide-readable outputs)" +echo " scripts/qa/check-bioformats.sh (auto-validate via Bio-Formats showinf)" +echo " scripts/qa/MANUAL-TEST-PLAN.md (what to open where + what to look for)" +echo "===========================================================================" diff --git a/tests/integration/svs_aperio_conformance_test.go b/tests/integration/svs_aperio_conformance_test.go index 99b31da..43a4456 100644 --- a/tests/integration/svs_aperio_conformance_test.go +++ b/tests/integration/svs_aperio_conformance_test.go @@ -79,6 +79,62 @@ func TestSVSEdgeTilesAreFullSize(t *testing.T) { } } +// TestDICOMEdgeFramesAreFullSize guards the same padding fix in the DICOM engine +// path: DICOM TILED_FULL requires every frame to be exactly Rows×Columns, but the +// retile engine hands partial edge frames (and sub-frame levels) at content size. +// `--to dicom --factor` routes through the engine; converting a 2220×2967 source +// yields L0 edge frames AND a deepest level smaller than one frame, so the edge +// frame and the deepest-level frame JPEGs must both decode to the full frame size. +func TestDICOMEdgeFramesAreFullSize(t *testing.T) { + src := cmuFixture(t) + bin := buildOnce(t) + out := filepath.Join(t.TempDir(), "dcmout") + + if o, err := runCLI(bin, "convert", "--to", "dicom", "--factor", "2", "-f", "-o", out, src); err != nil { + t.Fatalf("convert --to dicom --factor 2: %v\n%s", err, o) + } + sl, err := opentile.OpenFile(out) + if err != nil { + t.Fatalf("open dicom output: %v", err) + } + defer sl.Close() + + levels := sl.Levels() + l0 := levels[0] + fw, fh := l0.TileSize.W, l0.TileSize.H // DICOM Columns×Rows + cols := (l0.Size.W + fw - 1) / fw + rows := (l0.Size.H + fh - 1) / fh + type tc struct { + lvl, col, row int + label string + } + cases := []tc{{0, 0, 0, "L0 interior"}} + if cols > 1 { + cases = append(cases, tc{0, cols - 1, 0, "L0 right edge"}) + } + if rows > 1 { + cases = append(cases, tc{0, 0, rows - 1, "L0 bottom edge"}) + } + // Deepest level (smaller than one frame). + cases = append(cases, tc{len(levels) - 1, 0, 0, "deepest level"}) + + for _, c := range cases { + lv := levels[c.lvl] + b, err := lv.Tile(c.col, c.row) + if err != nil { + t.Fatalf("%s: read frame L%d(%d,%d): %v", c.label, c.lvl, c.col, c.row, err) + } + w, h, ok := jpegSOFDims(b) + if !ok { + t.Fatalf("%s: frame has no JPEG SOF", c.label) + } + if w != lv.TileSize.W || h != lv.TileSize.H { + t.Errorf("%s: frame JPEG = %dx%d, want full frame %dx%d (DICOM frames must be uniform Rows×Columns)", + c.label, w, h, lv.TileSize.W, lv.TileSize.H) + } + } +} + // TestSVSSynthesizesThumbnailAtIFD1 guards the Aperio-layout fix: a source // without a thumbnail, converted to SVS, must get a synthesized thumbnail at // IFD 1. Genuine Aperio SVS always carries the thumbnail as the second IFD; From 83c1b29a9c78f5cf01ddd5ce58b00d8308c3e39b Mon Sep 17 00:00:00 2001 From: "Toby C. Cornish" Date: Sun, 28 Jun 2026 14:35:00 -0500 Subject: [PATCH 2/2] docs(changelog): DICOM edge-frame padding fix --- CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55f7e47..1e561d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,21 @@ All notable changes to wsi-tools will be documented here. The format is loosely ## [Unreleased] +### Fixed + +- **Corrupt edge frames in `convert --to dicom --factor` / `downsample` / `crop` + to DICOM (the retile-engine DICOM path).** Partial right/bottom edge frames — + and whole levels smaller than one frame — were encoded at their truncated + content size, but DICOM TILED_FULL requires every frame to be exactly + `Rows`×`Columns`; OpenSlide's DICOM reader (and other strict consumers) rejected + them (`Dimensional mismatch reading JPEG, expected 256x256, got …`). This is the + same class as the v0.24.1 TIFF edge-tile fix, which only covered the TIFF/IFE + encoder; `dicomFrameEncoder` now edge-replicates partial frames up to the full + frame size as well. The verbatim DICOM-source frame-copy path was never + affected. Added a cross-tool manual QA harness under `scripts/qa/` (matrix + generator + OpenSlide/Bio-Formats auto-validators + viewer checklist) that + surfaced this. + ## [0.24.1] - 2026-06-28 ### Fixed