diff --git a/docker/vllm.Dockerfile b/docker/vllm.Dockerfile index 0b9742df..1206a788 100644 --- a/docker/vllm.Dockerfile +++ b/docker/vllm.Dockerfile @@ -14,14 +14,22 @@ COPY --link . /daemon/ WORKDIR /daemon/nilai-models/ +# Install daemon dependencies into an isolated venv, separate from vLLM's /opt/venv. +# vLLM base image sets VIRTUAL_ENV=/opt/venv and puts it on PATH. +# UV_PROJECT_ENVIRONMENT ensures uv sync targets the daemon's own .venv. RUN apt-get update && \ apt-get install build-essential -y && \ pip install uv && \ - uv sync && \ + UV_PROJECT_ENVIRONMENT=/daemon/nilai-models/.venv uv sync && \ apt-get clean && \ apt-get autoremove && \ rm -rf /var/lib/apt/lists/* +# Upgrade transformers in vLLM's /opt/venv (where pip/python3 resolve via PATH). +# Gemma 4 architecture requires transformers>=5.5.0. +# --no-deps avoids pulling transitive deps that conflict with vLLM's pins. +RUN pip install 'transformers>=5.5.0' --no-deps && pip install 'huggingface-hub>=1.5.0,<2.0' + # Expose port 8000 for incoming requests EXPOSE 8000 diff --git a/nilai-models/run.sh b/nilai-models/run.sh index 67f72f90..bbe1a471 100644 --- a/nilai-models/run.sh +++ b/nilai-models/run.sh @@ -10,7 +10,7 @@ start_primary_process() { start_secondary_process() { echo "Starting the secondary process" - uv run python3 -m nilai_models.daemon + UV_PROJECT_ENVIRONMENT=/daemon/nilai-models/.venv uv run python3 -m nilai_models.daemon } main() {