Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,4 @@ outputs/
archive/
docs/20*-*.md
data/
.tasks/
64 changes: 32 additions & 32 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
ARG UBUNTU_VERSION=22.04
ARG CUDA_MAJOR_VERSION=12.8.1
ARG PYTHON_VERSION=3.11

########################
# Stage 1: build stage #
Expand All @@ -8,6 +9,7 @@ FROM nvidia/cuda:${CUDA_MAJOR_VERSION}-cudnn-devel-ubuntu${UBUNTU_VERSION} AS bu

ARG USER_UID=1001
ARG USER_GID=1001
ARG PYTHON_VERSION

# ensures that Python output to stdout/stderr is not buffered: prevents missing information when terminating
ENV PYTHONUNBUFFERED=1
Expand All @@ -26,6 +28,7 @@ WORKDIR /home/user
ENV PATH="/home/user/.local/bin:${PATH}"

RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
libtiff-dev \
cmake \
zlib1g-dev \
Expand All @@ -36,24 +39,22 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
zip unzip \
git \
openssh-server \
software-properties-common \
gnupg2 \
gpg-agent \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-distutils \
&& mkdir /var/run/sshd \
&& curl -fsSL https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python3 \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3.11-venv \
python3.11-distutils \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python3 \
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python \
&& ln -sf /usr/bin/python3.11 /usr/bin/python \
&& rm -rf /var/lib/apt/lists/*

# libjpeg-turbo 3.x (required by PyTurboJPEG>=2)
ARG LIBJPEG_TURBO_VERSION=3.1.0
RUN curl -fsSL https://github.com/libjpeg-turbo/libjpeg-turbo/releases/download/${LIBJPEG_TURBO_VERSION}/libjpeg-turbo-${LIBJPEG_TURBO_VERSION}.tar.gz \
Expand All @@ -69,8 +70,7 @@ WORKDIR /opt/app/
ARG PYTORCH_CUDA_INDEX_URL=https://download.pytorch.org/whl/cu128
ARG GIT_MODEL_DEPENDENCIES="git+https://github.com/lilab-stanford/MUSK.git git+https://github.com/Mahmoodlab/CONCH.git git+https://github.com/prov-gigapath/prov-gigapath.git git+https://github.com/facebookresearch/sam2.git"

RUN python -m ensurepip --upgrade \
&& python -m pip install --upgrade pip setuptools pip-tools \
RUN python -m pip install --upgrade pip setuptools pip-tools \
&& python -m pip install hatchling psutil \
&& rm -rf /home/user/.cache/pip

Expand Down Expand Up @@ -108,6 +108,7 @@ FROM nvidia/cuda:${CUDA_MAJOR_VERSION}-cudnn-runtime-ubuntu${UBUNTU_VERSION}

ARG USER_UID=1001
ARG USER_GID=1001
ARG PYTHON_VERSION

ENV PYTHONUNBUFFERED=1
ENV DEBIAN_FRONTEND=noninteractive TZ=Europe/Amsterdam
Expand All @@ -126,32 +127,31 @@ WORKDIR /home/user
ENV PATH="/home/user/.local/bin:${PATH}"

RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
libtiff-dev \
zlib1g-dev \
libnuma1 \
libspatialindex-dev \
curl \
vim screen \
zip unzip \
git \
openssh-server \
software-properties-common \
gnupg2 \
gpg-agent \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-distutils \
&& mkdir /var/run/sshd \
&& curl -fsSL https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python3 \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3.11-venv \
python3.11-distutils \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python3 \
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python \
&& ln -sf /usr/bin/python3.11 /usr/bin/python \
&& rm -rf /var/lib/apt/lists/*

# libjpeg-turbo 3.x (copied from build stage)
COPY --from=build /usr/local/lib/libjpeg* /usr/local/lib/
COPY --from=build /usr/local/lib/libturbojpeg* /usr/local/lib/
Expand All @@ -166,11 +166,11 @@ RUN apt-get update && curl -L ${ASAP_URL} -o /tmp/ASAP.deb && apt-get install --
rm -rf /var/lib/apt/lists/*

# copy Python libs & entrypoints from build stage (includes flash-attn, your deps, ASAP .pth)
COPY --from=build /usr/local/lib/python3.11/dist-packages /usr/local/lib/python3.11/dist-packages
COPY --from=build /usr/local/lib/python${PYTHON_VERSION}/dist-packages /usr/local/lib/python${PYTHON_VERSION}/dist-packages
COPY --from=build /usr/local/bin /usr/local/bin

# register libnvimgcodec so cucim can use GPU-accelerated JPEG decoding
RUN echo "/usr/local/lib/python3.11/dist-packages/nvidia/nvimgcodec" > /etc/ld.so.conf.d/nvimgcodec.conf && \
RUN echo "/usr/local/lib/python${PYTHON_VERSION}/dist-packages/nvidia/nvimgcodec" > /etc/ld.so.conf.d/nvimgcodec.conf && \
ldconfig

# copy app code
Expand Down
36 changes: 17 additions & 19 deletions Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ FROM ubuntu:22.04

ARG USER_UID=1001
ARG USER_GID=1001
ARG PYTHON_VERSION=3.11

ENV PYTHONUNBUFFERED=1
ENV DEBIAN_FRONTEND=noninteractive
Expand All @@ -18,37 +19,35 @@ RUN groupadd --gid ${USER_GID} user \
WORKDIR /opt/app

RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
libtiff-dev \
cmake \
zlib1g-dev \
libnuma1 \
libspatialindex-dev \
curl \
cmake \
gnupg2 \
gpg-agent \
vim screen \
zip unzip \
git \
openssh-server \
build-essential \
ninja-build \
software-properties-common \
gnupg2 \
gpg-agent \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-distutils \
&& mkdir /var/run/sshd \
&& curl -fsSL https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION} \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python3 \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python \
&& ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

RUN add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
python3.11 \
python3.11-dev \
python3.11-venv \
python3.11-distutils \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python3 \
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python \
&& ln -sf /usr/bin/python3.11 /usr/bin/python \
&& rm -rf /var/lib/apt/lists/*

# libjpeg-turbo 3.x (required by PyTurboJPEG>=2)
ARG LIBJPEG_TURBO_VERSION=3.1.0
RUN curl -fsSL https://github.com/libjpeg-turbo/libjpeg-turbo/releases/download/${LIBJPEG_TURBO_VERSION}/libjpeg-turbo-${LIBJPEG_TURBO_VERSION}.tar.gz \
Expand All @@ -71,8 +70,7 @@ RUN set -eux; \
apt-get clean; \
rm -rf /var/lib/apt/lists/*

RUN python -m ensurepip --upgrade \
&& python -m pip install --upgrade pip setuptools pip-tools \
RUN python -m pip install --upgrade pip setuptools pip-tools \
&& python -m pip install hatchling psutil \
&& rm -rf /home/user/.cache/pip

Expand Down
8 changes: 6 additions & 2 deletions docs/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,12 @@ Both are disabled by default. Enable them via the ``preview`` dict:
}
)

Preview images are written to ``<output_dir>/preview/mask/<sample_id>.png``
and ``<output_dir>/preview/tiling/<sample_id>.png``. Their paths are also
Preview images are written to ``<output_dir>/preview/mask/<sample_id>.jpg``
and ``<output_dir>/preview/tiling/<sample_id>.jpg``. Their paths are also
recorded in ``process_list.csv`` and on the returned
:class:`~slide2vec.EmbeddedSlide` (``mask_preview_path``,
``tiling_preview_path``).

When resuming a run, existing preview paths are preserved in
``process_list.csv`` for unchanged successful tiling artifacts if the preview
files still exist on disk.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ classifiers = [
"Programming Language :: Python :: 3.13",
]
dependencies = [
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.1",
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5",
"omegaconf",
"matplotlib",
"numpy<2",
Expand Down Expand Up @@ -88,7 +88,7 @@ fm = [
"pandas",
"pillow",
"rich",
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.1",
"hs2p[asap,cucim,openslide,sam2,vips]>=4.0.5",
"wandb",
"torch>=2.3,<2.8",
"torchvision>=0.18.0",
Expand Down
10 changes: 10 additions & 0 deletions slide2vec/distributed/pipeline_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ def main(argv=None) -> int:
if not callable(load_successful_tiled_slides_fn):
from slide2vec.runtime.manifest import load_successful_tiled_slides as load_successful_tiled_slides_fn
slide_records, tiling_results = load_successful_tiled_slides_fn(tiling_input_dir)
requested_sample_ids = request.get("sample_ids")
if requested_sample_ids is not None:
requested_sample_id_set = {str(sample_id) for sample_id in requested_sample_ids}
paired = [
(slide, tiling_result)
for slide, tiling_result in zip(slide_records, tiling_results)
if slide.sample_id in requested_sample_id_set
]
slide_records = [slide for slide, _ in paired]
tiling_results = [tiling_result for _, tiling_result in paired]
assignments = assign_slides_to_ranks(slide_records, tiling_results, num_gpus=world_size)
assigned_ids = assignments.get(global_rank, [])
if not assigned_ids:
Expand Down
9 changes: 9 additions & 0 deletions slide2vec/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,14 @@ def run_pipeline(
save_latents=execution.save_latents,
resume=resolved_preprocessing.resume,
)
skipped_slide_count = len(embeddable_slides) - len(pending_slides)
if resolved_preprocessing.resume and skipped_slide_count > 0:
emit_progress(
"embedding.resume",
total_slide_count=len(embeddable_slides),
pending_slide_count=len(pending_slides),
skipped_slide_count=skipped_slide_count,
)
local_persist_callback, _, _ = persist_callbacks.build_incremental_persist_callback(
model=model,
preprocessing=resolved_preprocessing,
Expand Down Expand Up @@ -785,6 +793,7 @@ def run_pipeline(
"embedding.finished",
slide_count=len(embeddable_slides),
slides_completed=len(embeddable_slides),
slides_skipped=skipped_slide_count,
tile_artifacts=len(tile_artifacts) + len(hierarchical_artifacts),
slide_artifacts=len(slide_artifacts),
)
Expand Down
33 changes: 32 additions & 1 deletion slide2vec/progress.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@ def _format_line(self, kind: str, payload: dict[str, Any]) -> str | None:
return f"Model {payload['model_name']} ready on {payload['device']}"
if kind == "embedding.started":
return f"Embedding slides ({payload['slide_count']} total)..."
if kind == "embedding.resume":
return (
f"Resume: skipped {payload['skipped_slide_count']} already processed slide(s); "
f"{payload['pending_slide_count']} pending"
)
if kind == "embedding.assignment.started":
return f"Assigning slides across {payload['num_gpus']} GPU(s)..."
if kind == "embedding.assignment.finished":
Expand All @@ -172,9 +177,15 @@ def _format_line(self, kind: str, payload: dict[str, Any]) -> str | None:
if kind == "embedding.slide.finished":
return f"Completed {_progress_subject(payload)} ({payload['num_tiles']} tiles)"
if kind == "embedding.finished":
skipped_text = (
f", {payload['slides_skipped']} skipped"
if "slides_skipped" in payload
else ""
)
return (
f"Embedding finished: {payload['slides_completed']}/{payload['slide_count']} slides, "
f"{payload['tile_artifacts']} tile artifacts, {payload['slide_artifacts']} slide artifacts"
f"{skipped_text}"
)
if kind == "backend.selected":
return _format_backend_selected_message(payload)
Expand Down Expand Up @@ -367,6 +378,23 @@ def emit(self, event: ProgressEvent) -> None:
self._ensure_progress_started()
self._task_ids["embedding"] = self.progress.add_task("Embedding slides", total=payload["slide_count"])
return
if kind == "embedding.resume":
skipped = int(payload["skipped_slide_count"])
pending = int(payload["pending_slide_count"])
total = int(payload["total_slide_count"])
if skipped > 0:
self.console.print(
f"Resume: skipped {skipped} already processed slide(s); {pending}/{total} pending"
)
task_id = self._task_ids.get("embedding")
if task_id is not None:
self.progress.update(
task_id,
total=pending,
completed=0,
description=f"Embedding slides ({pending} pending, {skipped} skipped)",
)
return
if kind == "embedding.assignment.started":
self._ensure_progress_started()
self._task_ids["embedding_assignment"] = self.progress.add_task(
Expand Down Expand Up @@ -616,11 +644,14 @@ def _embedding_summary_rows(payload: dict[str, Any]) -> list[tuple[str, str]]:
slide_count = int(payload["slide_count"])
completed = int(payload["slides_completed"])
failed = max(0, slide_count - completed)
return [
rows = [
("Slides w/ tiles", str(slide_count)),
("Completed", str(completed)),
("Failed", str(failed)),
]
if "slides_skipped" in payload:
rows.insert(2, ("Skipped", str(payload["slides_skipped"])))
return rows


def read_progress_events(
Expand Down
Loading
Loading