diff --git a/system/scene/docker/Dockerfile b/system/scene/docker/Dockerfile
index 79399787..a399b7d3 100644
--- a/system/scene/docker/Dockerfile
+++ b/system/scene/docker/Dockerfile
@@ -1,3 +1,4 @@
+# syntax=docker/dockerfile:1.7
 # SPDX-License-Identifier: MulanPSL-2.0
 # system/scene container — ROS Humble base + scene's Python deps.
 #
@@ -17,17 +18,45 @@
 #
 # The image is built on demand by `bash system/scene/scripts/build.sh`
 # the first time, then reused. Rebuild only when this Dockerfile or
-# requirements.txt change.
+# requirements/ change.
 
-# `ros:humble-ros-base` is the slim official image (no GUI / desktop
-# tooling). osrf/ros:* would also work but the user's registry mirror
-# can't always reach the osrf namespace.
 FROM ros:humble-ros-base
 
+# ── Build-time proxy helpers ──────────────────────────────────────
+# Usage:
+#   - apt / pip: use `without-proxy ...`
+#   - git / HuggingFace / OpenAI CDN: use `with-build-proxy ...`
+#
+# Note:
+#   HTTP_PROXY_HOST / HTTPS_PROXY_HOST / NO_PROXY_HOST are declared later,
+#   just before the first proxy-needed layer, so changing proxy args does
+#   not invalidate earlier apt/pip layers.
+RUN cat > /usr/local/bin/with-build-proxy <<'EOF' \
+ && chmod +x /usr/local/bin/with-build-proxy \
+ && cat > /usr/local/bin/without-proxy <<'EOF2' \
+ && chmod +x /usr/local/bin/without-proxy
+#!/usr/bin/env sh
+set -eu
+
+export HTTP_PROXY="${HTTP_PROXY_HOST:-}"
+export HTTPS_PROXY="${HTTPS_PROXY_HOST:-}"
+export http_proxy="${HTTP_PROXY_HOST:-}"
+export https_proxy="${HTTPS_PROXY_HOST:-}"
+export NO_PROXY="${NO_PROXY_HOST:-}"
+export no_proxy="${NO_PROXY_HOST:-}"
+
+exec "$@"
+EOF
+#!/usr/bin/env sh
+set -eu
+
+unset HTTP_PROXY HTTPS_PROXY http_proxy https_proxy
+unset ALL_PROXY all_proxy NO_PROXY no_proxy
+
+exec "$@"
+EOF2
+
 # ── TUNA mirrors for GFW-bound networks ─────────────────────────────
-# Swap apt sources (Ubuntu/ports + ROS 2) to TUNA. Use `find` + sed
-# --follow-symlinks because /etc/apt/sources.list.d/ros2.sources is
-# a symlink into /usr/share/ros-apt-source/ in the upstream ros image.
 RUN set -eux; \
     find /etc/apt/sources.list.d/ -maxdepth 1 \( -name '*.list' -o -name '*.sources' \) -print0 \
       | xargs -0 -r sed -i --follow-symlinks \
@@ -40,119 +69,148 @@ RUN set -eux; \
             -e 's|http://ports\.ubuntu\.com/ubuntu-ports/\?|https://mirrors.tuna.tsinghua.edu.cn/ubuntu-ports/|g' \
             /etc/apt/sources.list; \
     fi
+
 ENV PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
     UV_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple \
     HF_ENDPOINT=https://hf-mirror.com
 
 ENV DEBIAN_FRONTEND=noninteractive \
     PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=1 \
     PIP_DISABLE_PIP_VERSION_CHECK=1
 
-# System packages we need at runtime:
-#   python3-pip          — install scene deps
-#   python3-cv-bridge    — sensor_msgs/Image ↔ numpy (apt has the
-#                          rclpy-aware cv_bridge that pip doesn't)
-#   python3-numpy        — pinned to 1.x by ROS Humble; matches host
-#   ros-humble-tf2-ros   — tf transforms
-#   ros-humble-sensor-msgs / nav-msgs already in -ros-base
-RUN apt-get update \
- && apt-get install -y --no-install-recommends \
-        python3-pip \
-        python3-cv-bridge \
-        python3-numpy \
-        ros-humble-tf2-ros \
-        ros-humble-tf-transformations \
-        ros-humble-rmw-zenoh-cpp \
-        ros-humble-zenoh-bridge-dds \
- && rm -rf /var/lib/apt/lists/*
+# System packages we need at runtime.
+# apt: do NOT use proxy; we already switched to TUNA mirrors.
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    without-proxy sh -c ' \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            python3-pip \
+            python3-cv-bridge \
+            python3-numpy \
+            ros-humble-tf2-ros \
+            ros-humble-tf-transformations \
+            ros-humble-rmw-zenoh-cpp \
+            ros-humble-zenoh-bridge-dds && \
+        rm -rf /var/lib/apt/lists/* \
+    '
+
+# pip: do NOT use proxy.
+RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    without-proxy python3 -m pip install \
+        --progress-bar raw \
+        --timeout 60 \
+        --retries 3 \
+        --upgrade pip
 
 # torch + torchvision — sm_120 (Blackwell / RTX 5090) requires
 # torch 2.7+ with CUDA 12.8 kernels. cu124 wheels only cover up to
 # sm_90 and will fail with "no kernel image" on Blackwell.
-# Aliyun mirror carries cu128 wheels; fall back to upstream PyPI.
-RUN pip install --find-links https://mirrors.aliyun.com/pytorch-wheels/cu128/ \
+# Aliyun mirror carries cu128 wheels; fall back to upstream PyTorch index.
+RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    without-proxy python3 -m pip install \
+        --progress-bar raw \
+        --timeout 60 \
+        --retries 3 \
+        -i https://mirrors.aliyun.com/pypi/simple/ \
+        --find-links https://mirrors.aliyun.com/pytorch-wheels/cu128/ \
         torch==2.7.1 torchvision==0.22.1 \
- || pip install --index-url https://download.pytorch.org/whl/cu128 \
+    || with-build-proxy python3 -m pip install \
+        --progress-bar raw \
+        --timeout 60 \
+        --retries 3 \
+        --index-url https://download.pytorch.org/whl/cu128 \
         torch==2.7.1 torchvision==0.22.1
 
-# Optional: pass from host via `scripts/build.sh` as HTTP_PROXY_HOST /
-# HTTPS_PROXY_HOST / NO_PROXY_HOST (not HTTP_PROXY — avoids BuildKit treating
-# them as automatic global proxy args). Mapped to standard names only for the
-# network-heavy layers below (pip requirements, git, HF, etc.). Use
-# http://host.docker.internal:7890 — not 127.0.0.1 (that is the build container).
+# Generic scene deps + concept-graphs perception stack.
+# pip: do NOT use proxy; use PIP_INDEX_URL=TUNA.
+
+# base
+COPY requirements/scene-base.txt /tmp/requirements/scene-base.txt
+RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    without-proxy pip install -r /tmp/requirements/scene-base.txt \
+        --progress-bar raw \
+        --timeout 60 \
+        --retries 3 \
+    && rm /tmp/requirements/scene-base.txt
+
+# core
+COPY requirements/scene-perception-core.txt /tmp/requirements/scene-perception-core.txt
+RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    without-proxy pip install -r /tmp/requirements/scene-perception-core.txt \
+        --progress-bar raw \
+        --timeout 60 \
+        --retries 3 \
+    && rm /tmp/requirements/scene-perception-core.txt
+
+# network heavy
+COPY requirements/scene-perception-heavy.txt /tmp/requirements/scene-perception-heavy.txt
+RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    without-proxy pip install -r /tmp/requirements/scene-perception-heavy.txt \
+        --progress-bar raw \
+        --timeout 60 \
+        --retries 3 \
+    && rm /tmp/requirements/scene-perception-heavy.txt
+
+# pytorch3d (see requirements/scene-pytorch3d.txt to decide whether installing it or not.)
+# COPY requirements/scene-pytorch3d.txt /tmp/requirements/scene-pytorch3d.txt
+# RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+#     without-proxy pip install -r /tmp/requirements/scene-pytorch3d.txt \
+#         --progress-bar raw \
+#         --timeout 60 \
+#         --retries 3 \
+#     && rm /tmp/requirements/scene-pytorch3d.txt
+
+# ── Proxy-needed network layers start here ─────────────────────────
+# Declare build args as late as possible to avoid invalidating earlier
+# apt/pip layers when proxy values change.
 ARG HTTP_PROXY_HOST
 ARG HTTPS_PROXY_HOST
 ARG NO_PROXY_HOST
-ENV HTTP_PROXY=${HTTP_PROXY_HOST} \
-    HTTPS_PROXY=${HTTPS_PROXY_HOST} \
-    NO_PROXY=${NO_PROXY_HOST} \
-    http_proxy=${HTTP_PROXY_HOST} \
-    https_proxy=${HTTPS_PROXY_HOST} \
-    no_proxy=${NO_PROXY_HOST}
-
-# Generic scene deps + concept-graphs perception stack (no
-# pytorch3d / chamferdist / gradslam — those are needed only by
-# concept-graphs' full SLAM pipeline, not by detection + merge,
-# which is all we use).
-COPY requirements.txt /tmp/requirements.txt
-RUN pip install -r /tmp/requirements.txt \
- && rm /tmp/requirements.txt
-
-# concept-graphs source from the ali-dev branch (smaller, no
-# Grounded-SAM / pytorch3d dependency tangle). We need the
-# `conceptgraph.slam.utils` and `conceptgraph.slam.mapping` modules
-# (depth backproject + per-object pcd + DBSCAN-based merge). Install
-# editable so we can edit-and-test from inside the container without
-# rebuilding when iterating on their helpers.
-# git clone goes through ghfast.top mirror with fallback to direct.
-# --no-deps because concept-graphs's setup.py declares pytorch3d as
-# an install_requires which we explicitly skip on aarch64
-# (see requirements.txt).
-RUN ( git clone --depth 1 --branch ali-dev \
+
+# concept-graphs source from the ali-dev branch.
+# git clone: use proxy.
+RUN set -eux; \
+    rm -rf /opt/concept-graphs; \
+    with-build-proxy git clone --depth 1 --branch ali-dev \
         https://ghfast.top/https://github.com/concept-graphs/concept-graphs.git \
         /opt/concept-graphs \
-   || git clone --depth 1 --branch ali-dev \
-        https://github.com/concept-graphs/concept-graphs.git \
-        /opt/concept-graphs ) \
- && pip install --no-deps -e /opt/concept-graphs \
- || (echo "concept-graphs install failed — see comment in Dockerfile" \
-     && exit 1)
-
-# Bake CV model weights into the image — robonix invariant: a robot
-# booting on a customer network can't reach github releases /
-# huggingface CDN. ali-dev's recommended stack:
-#   yolov8l-world.pt — 91 MB, open-vocab YOLO via CLIP text encoder
-#   mobile_sam.pt    — 40 MB, MobileSAM for promptable masks
-# Plus open_clip ViT-B-32 LAION-2B for per-object CLIP features (~150 MB,
-# small enough for Jetson; 5090 build can swap to ViT-L/14 via env).
-#
-# YOLO + MobileSAM are pre-downloaded onto the host into docker/_weights/
-# (out-of-band, see scripts/build.sh's `pre_fetch_weights` block) and
-# COPY'd into the image. We tried `RUN curl` from inside buildx but
-# github CDN connections from CN drop mid-stream after ~10 minutes and
-# break a multi-hundred-MB build. Out-of-band download with retries is
-# more robust and lets buildx cache the COPY layer.
-#
-# open_clip ViT-B-32 LAION-2B comes from huggingface via HF_ENDPOINT
-# (hf-mirror.com); much smaller and reliable.
+    || ( \
+        rm -rf /opt/concept-graphs && \
+        with-build-proxy git clone --depth 1 --branch ali-dev \
+            https://github.com/concept-graphs/concept-graphs.git \
+            /opt/concept-graphs \
+    )
+
+# concept-graphs editable install.
+# pip: do NOT use proxy.
+RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
+    without-proxy pip install --no-deps -e /opt/concept-graphs
+
+# Bake CV model weights into the image.
 ENV HF_HOME=/opt/models/hf
+
 RUN mkdir -p /opt/models /opt/models/hf /root/.cache/clip
+
 COPY _weights/yolov8l-world.pt /opt/models/yolov8l-world.pt
 COPY _weights/mobile_sam.pt    /opt/models/mobile_sam.pt
+
 # Two CLIP-flavored downloads bake in here:
-#   1. open_clip's ViT-B-32 LAION-2B (we use this for per-detection
-#      visual-similarity dedup in concept-graphs) — goes to HF_HOME.
-#   2. openai's `clip` ViT-B/32 (separate package; ultralytics'
-#      YOLO-World text encoder calls clip.load("ViT-B/32") during
-#      set_classes() and would otherwise pull 354 MB from openai's
-#      CDN at first inference — robonix invariant violation).
-RUN python3 -c "import open_clip; open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')" \
- && python3 -c "import clip; clip.load('ViT-B/32', device='cpu', download_root='/root/.cache/clip')" \
- && ls -la /opt/models/ /root/.cache/clip/
-
-# Drop build-time HTTP proxy after all network-heavy build steps (pip/github/git/HF/openai CDN).
-ENV http_proxy= https_proxy= no_proxy= HTTP_PROXY= HTTPS_PROXY= NO_PROXY=
+#   1. open_clip's ViT-B-32 LAION-2B — goes to HF_HOME.
+#   2. openai's `clip` ViT-B/32 — goes to /root/.cache/clip.
+#
+# Non-pip model downloads: use proxy.
+RUN with-build-proxy sh <<'EOF'
+set -eux
+unset HF_ENDPOINT
+export HF_HUB_DOWNLOAD_TIMEOUT=120
+
+python3 -c "import open_clip; open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k')"
+python3 -c "import clip; clip.load('ViT-B/32', device='cpu', download_root='/root/.cache/clip')"
+
+ls -la /opt/models/ /root/.cache/clip/
+EOF
+
+# No global proxy ENV was set, so there is no build-time proxy to clear here.
 
 # Fixed env so perception_concept_graphs.py knows where the weights
 # live without each deployment having to override.
@@ -162,22 +220,16 @@ ENV SCENE_YOLO_WORLD_WEIGHTS=/opt/models/yolov8l-world.pt \
     SCENE_CLIP_PRETRAINED=laion2b_s34b_b79k
 
 WORKDIR /scene
+
 COPY entrypoint.sh /entrypoint.sh
 COPY no_shm_profile.xml /etc/fastrtps_no_shm.xml
+
 RUN chmod +x /entrypoint.sh
 
-# Force FastRTPS to skip SHM transport — see mapping_rbnx/docker for
-# full reasoning. UDP-only is the only path that data-flows reliably
-# across our containers (sim ↔ scene ↔ mapping) in this layout.
+# Force FastRTPS to skip SHM transport.
 ENV FASTRTPS_DEFAULT_PROFILES_FILE=/etc/fastrtps_no_shm.xml
 
-# Stick with FastRTPS (sim's default RMW). With --network host +
-# --ipc=host the SHM transport shares cleanly between sim and scene,
-# and FastRTPS UDP multicast discovery covers the rest. We tried
-# Zenoh first but `zenoh_bridge_dds` cannot see FastRTPS-only SHM
-# publishers reliably across containers, so this is the path that
-# actually delivers /amcl_pose, /odom, /scanner, /head_front_camera/*
-# into scene's rclpy hub.
+# Stick with FastRTPS.
 ENV RMW_IMPLEMENTATION=rmw_fastrtps_cpp
 
-ENTRYPOINT ["/entrypoint.sh"]
+ENTRYPOINT ["/entrypoint.sh"]
\ No newline at end of file
diff --git a/system/scene/docker/requirements.txt b/system/scene/docker/requirements.txt
index c928be80..c7ad3969 100644
--- a/system/scene/docker/requirements.txt
+++ b/system/scene/docker/requirements.txt
@@ -1,3 +1,5 @@
+# NOTE: This file is a BACKUP of requirements/ !!!
+
 # Scene-container Python deps. Subset of pyproject.toml's
 # [project.dependencies] minus what the base image already provides
 # (numpy / cv-bridge / rclpy / tf2_ros).
diff --git a/system/scene/docker/requirements/scene-base.txt b/system/scene/docker/requirements/scene-base.txt
new file mode 100644
index 00000000..cce17f6e
--- /dev/null
+++ b/system/scene/docker/requirements/scene-base.txt
@@ -0,0 +1,15 @@
+# ── Generic scene ────────────────────────────────────────────────
+scipy>=1.11
+mcp
+fastmcp
+grpcio
+grpcio-tools
+protobuf
+uvicorn
+httpx
+Pillow
+PyYAML
+
+setuptools==69.5.1
+wheel
+packaging
\ No newline at end of file
diff --git a/system/scene/docker/requirements/scene-perception-core.txt b/system/scene/docker/requirements/scene-perception-core.txt
new file mode 100644
index 00000000..3cc574c5
--- /dev/null
+++ b/system/scene/docker/requirements/scene-perception-core.txt
@@ -0,0 +1,30 @@
+# ── concept-graphs perception stack ──────────────────────────────
+# torch 2.5+/cu124 because cu118 wheels don't run on sm_120 (Blackwell);
+# ROS Humble's python3-numpy is pinned to 1.x so we let pip pick a
+# compatible scipy build automatically.
+opencv-python-headless>=4.8
+ultralytics>=8.3            # YOLO-World v2 + the SAM wrapper that
+                            # accepts `.predict(image, bboxes=...)`
+# ultralytics' YOLOWorld imports openai's `clip` (NOT the same as
+# open_clip below) for its CLIP text encoder. Without it the model
+# load raises "No module named 'clip'". `clip-anytorch` is the fork
+# that supports modern torch (the original openai `clip-by-openai`
+# uses a TorchScript Node API that's broken on torch 2.5+).
+clip-anytorch>=2.6
+open_clip_torch>=2.20       # ViT-B-32 LAION-2B; small enough for
+                            # Jetson, x86-5090 happily uses ViT-L/14
+                            # by swapping the env var.
+supervision==0.14.0         # sv.Detections is the in-memory container
+                            # gobs_to_detection_list expects.
+omegaconf==2.3.0            # concept-graphs' cfg dict format.
+hydra-core==1.3.2           # for loading their cfslam yaml.
+transformers>=4.40          # tokenizers for CLIP text path
+# concept-graphs's general_utils → vlm imports openai unconditionally,
+# even though we don't call any VLM helpers from our merge path. Add
+# it as a stub install so the slam module imports cleanly.
+openai>=1.0
+# concept-graphs also imports wandb (optional) and rich; wandb is a
+# soft import via OptionalWandB, but rich is mandatory.
+rich
+matplotlib                 # used by DetectionList.color_by_instance
+seaborn                    # used by some logging utilities
\ No newline at end of file
diff --git a/system/scene/docker/requirements/scene-perception-heavy.txt b/system/scene/docker/requirements/scene-perception-heavy.txt
new file mode 100644
index 00000000..2ac9ad78
--- /dev/null
+++ b/system/scene/docker/requirements/scene-perception-heavy.txt
@@ -0,0 +1,5 @@
+# Open3D is concept-graphs' point-cloud rep. Pcd ops are CPU-bound
+# but we don't accumulate tens of thousands per frame; one per object
+# per tick is fine.
+open3d>=0.17,<0.19
+faiss-cpu==1.7.4 
\ No newline at end of file
diff --git a/system/scene/docker/requirements/scene-pytorch3d.txt b/system/scene/docker/requirements/scene-pytorch3d.txt
new file mode 100644
index 00000000..568f4e82
--- /dev/null
+++ b/system/scene/docker/requirements/scene-pytorch3d.txt
@@ -0,0 +1,12 @@
+# pytorch3d was originally pulled in for concept-graphs's
+# compute_3d_iou_accurate_batch (used by compute_overlap_matrix_general
+# → merge_overlap_objects), but we replaced that path with a voxel-set
+# fraction-overlap impl (see perception_concept_graphs.py compute_*_
+# overlap_*); the ali-dev branch of concept-graphs does NOT import
+# pytorch3d in slam.{utils,mapping,slam_classes} or utils.model_utils.
+#
+# x86_64-only: miropsota's wheel index has no aarch64 binaries and
+# source build would need nvcc, so we skip on Jetson. Runtime path
+# does not exercise pytorch3d.
+--extra-index-url https://miropsota.github.io/torch_packages_builder
+pytorch3d==0.7.8+pt2.5.1cu124; platform_machine == "x86_64"
\ No newline at end of file
diff --git a/system/scene/scripts/build.sh b/system/scene/scripts/build.sh
index 50cd2d62..e4f2ff73 100755
--- a/system/scene/scripts/build.sh
+++ b/system/scene/scripts/build.sh
@@ -12,14 +12,27 @@
 # CLEAN=1 forces a full rebuild incl. nuking rbnx-build and
 # `docker build --no-cache`.
 #
-# HTTP proxy (Clash): export http_proxy/https_proxy on the host, e.g.
-# http://127.0.0.1:7890. Passes HTTP(S)_PROXY_HOST build-args (not HTTP_PROXY)
-# so BuildKit does not treat them as global proxy args; the Dockerfile maps them
-# to HTTP_PROXY/ENV only for the pip/git/HF-heavy layers. Loopback/localhost
-# in URLs is rewritten to host.docker.internal (+ --add-host=...:host-gateway).
-# Clash should allow LAN and listen on 0.0.0.0.
+# HTTP proxy:
+#   If you need proxy during docker build, export proxy variables on the host:
+#
+#     export http_proxy=http://127.0.0.1:7890
+#     export https_proxy=http://127.0.0.1:7890
+#
+#   This script uses `docker build --network=host`, so 127.0.0.1 inside
+#   the build container refers to the host network namespace on Linux.
+#   Therefore we intentionally do NOT rewrite 127.0.0.1/localhost to
+#   host.docker.internal here.
+#
+#   To force-disable proxy even when your shell has proxy env vars:
+#
+#     RBNX_BUILD_PROXY=0 ./this-script.sh
+#
+#   To use proxy automatically when env vars exist:
+#
+#     ./this-script.sh
 
 set -euo pipefail
+
 PKG="${RBNX_PACKAGE_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
 cd "$PKG"
 
@@ -36,6 +49,7 @@ mkdir -p "$BUILD/data"
 # ── 1. Codegen (.proto + grpc stubs + MCP dataclasses → rbnx-build/codegen/) ─
 FLAGS=(--mcp)
 [[ "$CLEAN" == "1" ]] && FLAGS+=(--clean)
+
 echo "[build] rbnx codegen ${FLAGS[*]}"
 rbnx codegen -p "$PKG" "${FLAGS[@]}"
 
@@ -45,43 +59,70 @@ rbnx codegen -p "$PKG" "${FLAGS[@]}"
 # with curl --retry-all-errors is much more robust, and the resulting
 # files become a cache-key-stable COPY into the image.
 #
-# RBNX_GH_MIRROR: prefix prepended to github.com URLs (default ghfast.top
-# for GFW-bound networks; set to empty string to disable and hit github
-# directly). If the mirror download fails, we fall back to direct.
+# RBNX_GH_MIRROR:
+#   Prefix prepended to github.com URLs.
+#   Default: https://ghfast.top/
+#   Set to empty string to disable mirror and hit GitHub directly:
+#
+#     RBNX_GH_MIRROR= ./this-script.sh
+#
 WEIGHTS_DIR="$PKG/docker/_weights"
 mkdir -p "$WEIGHTS_DIR"
+
 GH_MIRROR="${RBNX_GH_MIRROR-https://ghfast.top/}"
+
 fetch_weight() {
     local url="$1"
     local dest="$2"
+
     if [[ -s "$dest" ]]; then
         echo "[build] weight already present: $(basename "$dest")"
         return 0
     fi
+
     local primary="$url"
     if [[ -n "$GH_MIRROR" ]]; then
         primary="${GH_MIRROR%/}/$url"
     fi
+
     echo "[build] downloading $(basename "$dest") from $primary"
-    if curl -fL --connect-timeout 30 --retry 5 --retry-all-errors --retry-delay 5 \
-            -o "$dest" "$primary"; then
+
+    if curl -fL \
+            --connect-timeout 30 \
+            --retry 5 \
+            --retry-all-errors \
+            --retry-delay 5 \
+            -o "$dest" \
+            "$primary"; then
         return 0
     fi
+
     rm -f "$dest"
+
     if [[ "$primary" != "$url" ]]; then
         echo "[build] mirror failed; falling back to direct: $url" >&2
-        if curl -fL --connect-timeout 30 --retry 5 --retry-all-errors --retry-delay 5 \
-                -o "$dest" "$url"; then
+
+        if curl -fL \
+                --connect-timeout 30 \
+                --retry 5 \
+                --retry-all-errors \
+                --retry-delay 5 \
+                -o "$dest" \
+                "$url"; then
             return 0
         fi
+
         rm -f "$dest"
     fi
+
     echo "[build] error: failed to download $url" >&2
     exit 1
 }
+
 fetch_weight \
     "https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-world.pt" \
     "$WEIGHTS_DIR/yolov8l-world.pt"
+
 fetch_weight \
     "https://github.com/ChaoningZhang/MobileSAM/raw/master/weights/mobile_sam.pt" \
     "$WEIGHTS_DIR/mobile_sam.pt"
@@ -95,31 +136,58 @@ fi
 DOCKER_BUILD_FLAGS=(--network=host)
 [[ "$CLEAN" == "1" ]] && DOCKER_BUILD_FLAGS+=(--no-cache)
 
-# Proxy → docker build-args (see file-header comment).
+# Proxy → docker build-args.
+#
+# Important:
+#   We pass HTTP_PROXY_HOST / HTTPS_PROXY_HOST instead of Docker's special
+#   HTTP_PROXY / HTTPS_PROXY build args.
+#
+#   This avoids BuildKit treating them as global proxy args implicitly.
+#   The Dockerfile can then opt in only for selected heavy network layers
+#   such as apt/pip/git/HuggingFace.
+#
+#   Because we use --network=host, keep 127.0.0.1 unchanged.
+#
+USE_PROXY="${RBNX_BUILD_PROXY:-auto}"
+
 _http_proxy="${HTTP_PROXY:-${http_proxy:-}}"
 _https_proxy="${HTTPS_PROXY:-${https_proxy:-}}"
 _no_proxy="${NO_PROXY:-${no_proxy:-}}"
 
-proxy_for_docker_build() {
-    local url="$1"
-    [[ -z "$url" ]] && { printf ''; return; }
-    case "$url" in
-        *127.0.0.1*) url="${url//127.0.0.1/host.docker.internal}" ;;
-        *localhost*) url="${url//localhost/host.docker.internal}" ;;
-    esac
-    printf '%s' "$url"
-}
+_docker_http=""
+_docker_https=""
 
-_docker_http="$(proxy_for_docker_build "$_http_proxy")"
-_docker_https="$(proxy_for_docker_build "$_https_proxy")"
+case "$USE_PROXY" in
+    0|false|False|FALSE|no|No|NO)
+        echo "[build] proxy disabled by RBNX_BUILD_PROXY=$USE_PROXY"
+        ;;
+    1|true|True|TRUE|yes|Yes|YES|auto)
+        _docker_http="$_http_proxy"
+        _docker_https="$_https_proxy"
 
-if [[ -n "$_docker_http" || -n "$_docker_https" ]]; then
-    echo "[build] docker build: build-arg HTTP_PROXY_HOST=${_docker_http:-<unset>} HTTPS_PROXY_HOST=${_docker_https:-<unset>}"
-    DOCKER_BUILD_FLAGS+=(--add-host=host.docker.internal:host-gateway)
-    [[ -n "$_docker_http" ]] && DOCKER_BUILD_FLAGS+=(--build-arg "HTTP_PROXY_HOST=${_docker_http}")
-    [[ -n "$_docker_https" ]] && DOCKER_BUILD_FLAGS+=(--build-arg "HTTPS_PROXY_HOST=${_docker_https}")
-    [[ -n "$_no_proxy" ]] && DOCKER_BUILD_FLAGS+=(--build-arg "NO_PROXY_HOST=${_no_proxy}")
-fi
+        if [[ -n "$_docker_http" || -n "$_docker_https" ]]; then
+            echo "[build] proxy enabled:"
+            echo "[build]   HTTP_PROXY_HOST=${_docker_http:-<unset>}"
+            echo "[build]   HTTPS_PROXY_HOST=${_docker_https:-<unset>}"
+
+            if [[ "$_docker_http" == *host.docker.internal* || "$_docker_https" == *host.docker.internal* ]]; then
+                echo "[build] warning: proxy URL contains host.docker.internal while using --network=host." >&2
+                echo "[build] warning: for this script, prefer http://127.0.0.1:7890 on Linux." >&2
+            fi
+
+            [[ -n "$_docker_http" ]] && DOCKER_BUILD_FLAGS+=(--build-arg "HTTP_PROXY_HOST=${_docker_http}")
+            [[ -n "$_docker_https" ]] && DOCKER_BUILD_FLAGS+=(--build-arg "HTTPS_PROXY_HOST=${_docker_https}")
+            [[ -n "$_no_proxy" ]] && DOCKER_BUILD_FLAGS+=(--build-arg "NO_PROXY_HOST=${_no_proxy}")
+        else
+            echo "[build] proxy env not set; building without proxy"
+        fi
+        ;;
+    *)
+        echo "[build] error: invalid RBNX_BUILD_PROXY=$USE_PROXY" >&2
+        echo "[build] expected one of: auto, 1, 0, true, false, yes, no" >&2
+        exit 1
+        ;;
+esac
 
 # Skip the rebuild when the image already exists AND its layers are
 # all cached — `docker build` is idempotent so this branch is just an
@@ -131,4 +199,4 @@ fi
 echo "[build] docker build -t $IMG docker/"
 docker build "${DOCKER_BUILD_FLAGS[@]}" -t "$IMG" docker/
 
-echo "[build] done."
+echo "[build] done."
\ No newline at end of file