Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 158 additions & 106 deletions system/scene/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# syntax=docker/dockerfile:1.7
# SPDX-License-Identifier: MulanPSL-2.0
# system/scene container — ROS Humble base + scene's Python deps.
#
Expand All @@ -17,17 +18,45 @@
#
# The image is built on demand by `bash system/scene/scripts/build.sh`
# the first time, then reused. Rebuild only when this Dockerfile or
# requirements.txt change.
# requirements/ change.

# `ros:humble-ros-base` is the slim official image (no GUI / desktop
# tooling). osrf/ros:* would also work but the user's registry mirror
# can't always reach the osrf namespace.
FROM ros:humble-ros-base

# ── Build-time proxy helpers ──────────────────────────────────────
# Usage:
# - apt / pip: use `without-proxy ...`
# - git / HuggingFace / OpenAI CDN: use `with-build-proxy ...`
#
# Note:
# HTTP_PROXY_HOST / HTTPS_PROXY_HOST / NO_PROXY_HOST are declared later,
# just before the first proxy-needed layer, so changing proxy args does
# not invalidate earlier apt/pip layers.
RUN cat > /usr/local/bin/with-build-proxy <<'EOF' \
&& chmod +x /usr/local/bin/with-build-proxy \
&& cat > /usr/local/bin/without-proxy <<'EOF2' \
&& chmod +x /usr/local/bin/without-proxy
#!/usr/bin/env sh
set -eu

export HTTP_PROXY="${HTTP_PROXY_HOST:-}"
export HTTPS_PROXY="${HTTPS_PROXY_HOST:-}"
export http_proxy="${HTTP_PROXY_HOST:-}"
export https_proxy="${HTTPS_PROXY_HOST:-}"
export NO_PROXY="${NO_PROXY_HOST:-}"
export no_proxy="${NO_PROXY_HOST:-}"

exec "$@"
EOF
#!/usr/bin/env sh
set -eu

unset HTTP_PROXY HTTPS_PROXY http_proxy https_proxy
unset ALL_PROXY all_proxy NO_PROXY no_proxy

exec "$@"
EOF2

# ── TUNA mirrors for GFW-bound networks ─────────────────────────────
# Swap apt sources (Ubuntu/ports + ROS 2) to TUNA. Use `find` + sed
# --follow-symlinks because /etc/apt/sources.list.d/ros2.sources is
# a symlink into /usr/share/ros-apt-source/ in the upstream ros image.
RUN set -eux; \
find /etc/apt/sources.list.d/ -maxdepth 1 \( -name '*.list' -o -name '*.sources' \) -print0 \
| xargs -0 -r sed -i --follow-symlinks \
Expand All @@ -40,119 +69,148 @@ RUN set -eux; \
-e 's|http://ports\.ubuntu\.com/ubuntu-ports/\?|https://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports/|g' \
/etc/apt/sources.list; \
fi

ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
HF_ENDPOINT=https://hf-mirror.com

ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1

# System packages we need at runtime:
# python3-pip — install scene deps
# python3-cv-bridge — sensor_msgs/Image ↔ numpy (apt has the
# rclpy-aware cv_bridge that pip doesn't)
# python3-numpy — pinned to 1.x by ROS Humble; matches host
# ros-humble-tf2-ros — tf transforms
# ros-humble-sensor-msgs / nav-msgs already in -ros-base
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python3-pip \
python3-cv-bridge \
python3-numpy \
ros-humble-tf2-ros \
ros-humble-tf-transformations \
ros-humble-rmw-zenoh-cpp \
ros-humble-zenoh-bridge-dds \
&& rm -rf /var/lib/apt/lists/*
# System packages we need at runtime.
# apt: do NOT use proxy; we already switched to TUNA mirrors.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
without-proxy sh -c ' \
apt-get update && \
apt-get install -y --no-install-recommends \
python3-pip \
python3-cv-bridge \
python3-numpy \
ros-humble-tf2-ros \
ros-humble-tf-transformations \
ros-humble-rmw-zenoh-cpp \
ros-humble-zenoh-bridge-dds && \
rm -rf /var/lib/apt/lists/* \
'

# pip: do NOT use proxy.
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
without-proxy python3 -m pip install \
--progress-bar raw \
--timeout 60 \
--retries 3 \
--upgrade pip

# torch + torchvision — sm_120 (Blackwell / RTX 5090) requires
# torch 2.7+ with CUDA 12.8 kernels. cu124 wheels only cover up to
# sm_90 and will fail with "no kernel image" on Blackwell.
# Aliyun mirror carries cu128 wheels; fall back to upstream PyPI.
RUN pip install --find-links https://mirrors.aliyun.com/pytorch-wheels/cu128/ \
# Aliyun mirror carries cu128 wheels; fall back to upstream PyTorch index.
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
without-proxy python3 -m pip install \
--progress-bar raw \
--timeout 60 \
--retries 3 \
-i https://mirrors.aliyun.com/pypi/simple/ \
--find-links https://mirrors.aliyun.com/pytorch-wheels/cu128/ \
torch==2.7.1 torchvision==0.22.1 \
|| pip install --index-url https://download.pytorch.org/whl/cu128 \
|| with-build-proxy python3 -m pip install \
--progress-bar raw \
--timeout 60 \
--retries 3 \
--index-url https://download.pytorch.org/whl/cu128 \
torch==2.7.1 torchvision==0.22.1

# Optional: pass from host via `scripts/build.sh` as HTTP_PROXY_HOST /
# HTTPS_PROXY_HOST / NO_PROXY_HOST (not HTTP_PROXY — avoids BuildKit treating
# them as automatic global proxy args). Mapped to standard names only for the
# network-heavy layers below (pip requirements, git, HF, etc.). Use
# http://host.docker.internal:7890 — not 127.0.0.1 (that is the build container).
# Generic scene deps + concept-graphs perception stack.
# pip: do NOT use proxy; use PIP_INDEX_URL=TUNA.

# base
COPY requirements/scene-base.txt /tmp/requirements/scene-base.txt
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
without-proxy pip install -r /tmp/requirements/scene-base.txt \
--progress-bar raw \
--timeout 60 \
--retries 3 \
&& rm /tmp/requirements/scene-base.txt

# core
COPY requirements/scene-perception-core.txt /tmp/requirements/scene-perception-core.txt
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
without-proxy pip install -r /tmp/requirements/scene-perception-core.txt \
--progress-bar raw \
--timeout 60 \
--retries 3 \
&& rm /tmp/requirements/scene-perception-core.txt

# network heavy
COPY requirements/scene-perception-heavy.txt /tmp/requirements/scene-perception-heavy.txt
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
without-proxy pip install -r /tmp/requirements/scene-perception-heavy.txt \
--progress-bar raw \
--timeout 60 \
--retries 3 \
&& rm /tmp/requirements/scene-perception-heavy.txt

# pytorch3d (see requirements/scene-pytorch3d.txt to decide whether installing it or not.)
# COPY requirements/scene-pytorch3d.txt /tmp/requirements/scene-pytorch3d.txt
# RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
# without-proxy pip install -r /tmp/requirements/scene-pytorch3d.txt \
# --progress-bar raw \
# --timeout 60 \
# --retries 3 \
# && rm /tmp/requirements/scene-pytorch3d.txt

# ── Proxy-needed network layers start here ─────────────────────────
# Declare build args as late as possible to avoid invalidating earlier
# apt/pip layers when proxy values change.
ARG HTTP_PROXY_HOST
ARG HTTPS_PROXY_HOST
ARG NO_PROXY_HOST
ENV HTTP_PROXY=${HTTP_PROXY_HOST} \
HTTPS_PROXY=${HTTPS_PROXY_HOST} \
NO_PROXY=${NO_PROXY_HOST} \
http_proxy=${HTTP_PROXY_HOST} \
https_proxy=${HTTPS_PROXY_HOST} \
no_proxy=${NO_PROXY_HOST}

# Generic scene deps + concept-graphs perception stack (no
# pytorch3d / chamferdist / gradslam — those are needed only by
# concept-graphs' full SLAM pipeline, not by detection + merge,
# which is all we use).
COPY requirements.txt /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt \
&& rm /tmp/requirements.txt

# concept-graphs source from the ali-dev branch (smaller, no
# Grounded-SAM / pytorch3d dependency tangle). We need the
# `conceptgraph.slam.utils` and `conceptgraph.slam.mapping` modules
# (depth backproject + per-object pcd + DBSCAN-based merge). Install
# editable so we can edit-and-test from inside the container without
# rebuilding when iterating on their helpers.
# git clone goes through ghfast.top mirror with fallback to direct.
# --no-deps because concept-graphs's setup.py declares pytorch3d as
# an install_requires which we explicitly skip on aarch64
# (see requirements.txt).
RUN ( git clone --depth 1 --branch ali-dev \

# concept-graphs source from the ali-dev branch.
# git clone: use proxy.
RUN set -eux; \
rm -rf /opt/concept-graphs; \
with-build-proxy git clone --depth 1 --branch ali-dev \
https://ghfast.top/https://github.com/concept-graphs/concept-graphs.git \
/opt/concept-graphs \
|| git clone --depth 1 --branch ali-dev \
https://github.com/concept-graphs/concept-graphs.git \
/opt/concept-graphs ) \
&& pip install --no-deps -e /opt/concept-graphs \
|| (echo "concept-graphs install failed — see comment in Dockerfile" \
&& exit 1)

# Bake CV model weights into the image — robonix invariant: a robot
# booting on a customer network can't reach github releases /
# huggingface CDN. ali-dev's recommended stack:
# yolov8l-world.pt — 91 MB, open-vocab YOLO via CLIP text encoder
# mobile_sam.pt — 40 MB, MobileSAM for promptable masks
# Plus open_clip ViT-B-32 LAION-2B for per-object CLIP features (~150 MB,
# small enough for Jetson; 5090 build can swap to ViT-L/14 via env).
#
# YOLO + MobileSAM are pre-downloaded onto the host into docker/_weights/
# (out-of-band, see scripts/build.sh's `pre_fetch_weights` block) and
# COPY'd into the image. We tried `RUN curl` from inside buildx but
# github CDN connections from CN drop mid-stream after ~10 minutes and
# break a multi-hundred-MB build. Out-of-band download with retries is
# more robust and lets buildx cache the COPY layer.
#
# open_clip ViT-B-32 LAION-2B comes from huggingface via HF_ENDPOINT
# (hf-mirror.com); much smaller and reliable.
|| ( \
rm -rf /opt/concept-graphs && \
with-build-proxy git clone --depth 1 --branch ali-dev \
https://github.com/concept-graphs/concept-graphs.git \
/opt/concept-graphs \
)

# concept-graphs editable install.
# pip: do NOT use proxy.
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
without-proxy pip install --no-deps -e /opt/concept-graphs

# Bake CV model weights into the image.
ENV HF_HOME=/opt/models/hf

RUN mkdir -p /opt/models /opt/models/hf /root/.cache/clip

COPY _weights/yolov8l-world.pt /opt/models/yolov8l-world.pt
COPY _weights/mobile_sam.pt /opt/models/mobile_sam.pt

# Two CLIP-flavored downloads bake in here:
# 1. open_clip's ViT-B-32 LAION-2B (we use this for per-detection
# visual-similarity dedup in concept-graphs) — goes to HF_HOME.
# 2. openai's `clip` ViT-B/32 (separate package; ultralytics'
# YOLO-World text encoder calls clip.load("ViT-B/32") during
# set_classes() and would otherwise pull 354 MB from openai's
# CDN at first inference — robonix invariant violation).
RUN python3 -c "import open_clip; open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')" \
&& python3 -c "import clip; clip.load('ViT-B/32', device='cpu', download_root='/root/.cache/clip')" \
&& ls -la /opt/models/ /root/.cache/clip/

# Drop build-time HTTP proxy after all network-heavy build steps (pip/github/git/HF/openai CDN).
ENV http_proxy= https_proxy= no_proxy= HTTP_PROXY= HTTPS_PROXY= NO_PROXY=
# 1. open_clip's ViT-B-32 LAION-2B — goes to HF_HOME.
# 2. openai's `clip` ViT-B/32 — goes to /root/.cache/clip.
#
# Non-pip model downloads: use proxy.
RUN with-build-proxy sh <<'EOF'
set -eux
unset HF_ENDPOINT
export HF_HUB_DOWNLOAD_TIMEOUT=120

python3 -c "import open_clip; open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')"
python3 -c "import clip; clip.load('ViT-B/32', device='cpu', download_root='/root/.cache/clip')"

ls -la /opt/models/ /root/.cache/clip/
EOF

# No global proxy ENV was set, so there is no build-time proxy to clear here.

# Fixed env so perception_concept_graphs.py knows where the weights
# live without each deployment having to override.
Expand All @@ -162,22 +220,16 @@ ENV SCENE_YOLO_WORLD_WEIGHTS=/opt/models/yolov8l-world.pt \
SCENE_CLIP_PRETRAINED=laion2b_s34b_b79k

WORKDIR /scene

COPY entrypoint.sh /entrypoint.sh
COPY no_shm_profile.xml /etc/fastrtps_no_shm.xml

RUN chmod +x /entrypoint.sh

# Force FastRTPS to skip SHM transport — see mapping_rbnx/docker for
# full reasoning. UDP-only is the only path that data-flows reliably
# across our containers (sim ↔ scene ↔ mapping) in this layout.
# Force FastRTPS to skip SHM transport.
ENV FASTRTPS_DEFAULT_PROFILES_FILE=/etc/fastrtps_no_shm.xml

# Stick with FastRTPS (sim's default RMW). With --network host +
# --ipc=host the SHM transport shares cleanly between sim and scene,
# and FastRTPS UDP multicast discovery covers the rest. We tried
# Zenoh first but `zenoh_bridge_dds` cannot see FastRTPS-only SHM
# publishers reliably across containers, so this is the path that
# actually delivers /amcl_pose, /odom, /scanner, /head_front_camera/*
# into scene's rclpy hub.
# Stick with FastRTPS.
ENV RMW_IMPLEMENTATION=rmw_fastrtps_cpp

ENTRYPOINT ["/entrypoint.sh"]
ENTRYPOINT ["/entrypoint.sh"]
2 changes: 2 additions & 0 deletions system/scene/docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# NOTE: This file is a BACKUP of requirements/ !!!

# Scene-container Python deps. Subset of pyproject.toml's
# [project.dependencies] minus what the base image already provides
# (numpy / cv-bridge / rclpy / tf2_ros).
Expand Down
15 changes: 15 additions & 0 deletions system/scene/docker/requirements/scene-base.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# ── Generic scene ────────────────────────────────────────────────
scipy>=1.11
mcp
fastmcp
grpcio
grpcio-tools
protobuf
uvicorn
httpx
Pillow
PyYAML

setuptools==69.5.1
wheel
packaging
30 changes: 30 additions & 0 deletions system/scene/docker/requirements/scene-perception-core.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# ── concept-graphs perception stack ──────────────────────────────
# torch 2.5+/cu124 because cu118 wheels don't run on sm_120 (Blackwell);
# ROS Humble's python3-numpy is pinned to 1.x so we let pip pick a
# compatible scipy build automatically.
opencv-python-headless>=4.8
ultralytics>=8.3 # YOLO-World v2 + the SAM wrapper that
# accepts `.predict(image, bboxes=...)`
# ultralytics' YOLOWorld imports openai's `clip` (NOT the same as
# open_clip below) for its CLIP text encoder. Without it the model
# load raises "No module named 'clip'". `clip-anytorch` is the fork
# that supports modern torch (the original openai `clip-by-openai`
# uses a TorchScript Node API that's broken on torch 2.5+).
clip-anytorch>=2.6
open_clip_torch>=2.20 # ViT-B-32 LAION-2B; small enough for
# Jetson, x86-5090 happily uses ViT-L/14
# by swapping the env var.
supervision==0.14.0 # sv.Detections is the in-memory container
# gobs_to_detection_list expects.
omegaconf==2.3.0 # concept-graphs' cfg dict format.
hydra-core==1.3.2 # for loading their cfslam yaml.
transformers>=4.40 # tokenizers for CLIP text path
# concept-graphs's general_utils → vlm imports openai unconditionally,
# even though we don't call any VLM helpers from our merge path. Add
# it as a stub install so the slam module imports cleanly.
openai>=1.0
# concept-graphs also imports wandb (optional) and rich; wandb is a
# soft import via OptionalWandB, but rich is mandatory.
rich
matplotlib # used by DetectionList.color_by_instance
seaborn # used by some logging utilities
5 changes: 5 additions & 0 deletions system/scene/docker/requirements/scene-perception-heavy.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Open3D is concept-graphs' point-cloud rep. Pcd ops are CPU-bound
# but we don't accumulate tens of thousands per frame; one per object
# per tick is fine.
open3d>=0.17,<0.19
faiss-cpu==1.7.4
12 changes: 12 additions & 0 deletions system/scene/docker/requirements/scene-pytorch3d.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# pytorch3d was originally pulled in for concept-graphs's
# compute_3d_iou_accurate_batch (used by compute_overlap_matrix_general
# → merge_overlap_objects), but we replaced that path with a voxel-set
# fraction-overlap impl (see perception_concept_graphs.py compute_*_
# overlap_*); the ali-dev branch of concept-graphs does NOT import
# pytorch3d in slam.{utils,mapping,slam_classes} or utils.model_utils.
#
# x86_64-only: miropsota's wheel index has no aarch64 binaries and
# source build would need nvcc, so we skip on Jetson. Runtime path
# does not exercise pytorch3d.
--extra-index-url https://miropsota.github.io/torch_packages_builder
pytorch3d==0.7.8+pt2.5.1cu124; platform_machine == "x86_64"
Loading
Loading