Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
d727208
fix: tag tritonfrontend wheel with build platform (TRI-983)
mc-nv Apr 21, 2026
af8a6f1
fix: run auditwheel on tritonfrontend wheel (TRI-983)
mc-nv Apr 21, 2026
8630109
fix: tag tritonfrontend wheel with CI build number (TRI-983)
mc-nv Apr 21, 2026
e5bf246
fix: auto-derive python/ABI tags, compose local version, propagate en…
mc-nv Apr 21, 2026
34e599e
fix: source wheel build-tag from --build-id and detect CUDA locally (…
mc-nv Apr 21, 2026
24cf5e0
fix: source NVIDIA_UPSTREAM_VERSION from --upstream-container-version…
mc-nv Apr 21, 2026
63bbec0
Remove system package installation
mc-nv Apr 21, 2026
64e2c82
build: use /opt/venv-tritonserver virtualenv for pip installs
mc-nv Apr 21, 2026
e808b04
build: drop PIP_BREAK_SYSTEM_PACKAGES, isolate patchelf via throwaway…
mc-nv Apr 21, 2026
f1f035d
Fix package installation
mc-nv Apr 21, 2026
5ed69f2
fix: signal has_ext_modules() to make wheel platlib-compliant (TRI-983)
mc-nv Apr 21, 2026
0a5913e
build: venv in production stage, symlink patchelf (TRI-983)
mc-nv Apr 22, 2026
dd6ca0d
fix: robust NVIDIA_UPSTREAM_VERSION lookup for wheel naming (TRI-983)
mc-nv Apr 22, 2026
046ad94
fix: propagate CI_PIPELINE_ID and prefer it as wheel build tag (TRI-983)
mc-nv Apr 22, 2026
2ebd7d1
feat(wheel): use CI_JOB_ID as build tag, add PYPI_RELEASE support
mc-nv Apr 22, 2026
d9115b8
fix(wheel): revert build tag to CI_PIPELINE_ID for consistency
mc-nv Apr 22, 2026
16e5446
fix(wheel): omit build tag on PYPI_RELEASE=true
mc-nv Apr 22, 2026
0e47abb
fix(rhel): use ln -sf to overwrite existing patchelf symlink
mc-nv Apr 22, 2026
ad55303
fix: consolidate tool venvs into /opt/venv-tritonserver
mc-nv Apr 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions Dockerfile.QA
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ RUN apt-get update && \
software-properties-common && \
rm -rf /var/lib/apt/lists/*

RUN pip3 install cmake==4.0.3
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
RUN pip install cmake==4.0.3
ENV CMAKE_POLICY_VERSION_MINIMUM=3.5

# Add densenet_onnx model to example repo
Expand Down Expand Up @@ -348,7 +350,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
RUN rm -f /usr/bin/python && \
ln -s /usr/bin/python3 /usr/bin/python

RUN pip3 install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \
RUN pip install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \
"awscli<=1.36.40" six "grpcio-channelz<1.68" prettytable virtualenv \
check-jsonschema

Expand Down Expand Up @@ -377,7 +379,7 @@ COPY --chown=1000:1000 --from=sdk /workspace/qa/ qa/
RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \
find qa/pkgs/ -maxdepth 1 -type f -name \
"tritonclient-*-py3-none-any.whl" | xargs printf -- '%s[all]' | \
xargs pip3 install --upgrade
xargs pip install --upgrade

ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}

Expand Down
167 changes: 136 additions & 31 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):

ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
ENV CMAKE_POLICY_VERSION_MINIMUM=3.5
"""
df += """
# Install docker docker buildx
Expand Down Expand Up @@ -994,15 +994,22 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
df += change_default_python_version_rhel(FLAGS.rhel_py_version)
df += """

RUN pip3 install --upgrade pip \\
&& pip3 install --upgrade \\
# Create a dedicated virtualenv so pip installs are isolated from the
# distro-managed system Python. Subsequent RUN steps pick up the
# venv's pip/python via PATH.
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"

RUN pip install --upgrade pip \\
&& pip install --upgrade \\
build \\
wheel \\
setuptools \\
docker \\
virtualenv \\
patchelf==0.17.2 \\
cmake==4.0.3
cmake==4.0.3 \\
auditwheel
"""
df += f"""
# Install boost version >= 1.78 for boost::span
Expand Down Expand Up @@ -1047,7 +1054,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):

ARG TRITON_VERSION
ARG TRITON_CONTAINER_VERSION
ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
ENV CMAKE_POLICY_VERSION_MINIMUM=3.5
"""
# Install the windows- or linux-specific buildbase dependencies
if target_platform() == "windows":
Expand Down Expand Up @@ -1094,9 +1101,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
libb64-dev \\
libgoogle-perftools-dev \\
python3-dev \\
python3-pip \\
python3-wheel \\
python3-setuptools \\
python3-venv \\
rapidjson-dev \\
scons \\
software-properties-common \\
Expand All @@ -1110,12 +1115,21 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
wget \\
&& rm -rf /var/lib/apt/lists/*

RUN pip3 install --upgrade \\
# Create a dedicated virtualenv so pip installs are isolated from the
# distro-managed system Python. Subsequent RUN steps pick up the
# venv's pip/python via PATH.
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"

RUN pip install --upgrade \\
build \\
wheel \\
setuptools \\
docker \\
virtualenv \\
patchelf==0.17.2 \\
cmake==4.0.3 \\
auditwheel \\
pybind11[global]
"""

Expand Down Expand Up @@ -1194,7 +1208,6 @@ def create_dockerfile_cibase(ddir, dockerfile_name, argmap):

ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
Expand Down Expand Up @@ -1241,14 +1254,26 @@ def create_dockerfile_linux(
## Production stage: Create container with just inference server executable
############################################################################
FROM ${BASE_IMAGE}

ENV PIP_BREAK_SYSTEM_PACKAGES=1
"""

df += dockerfile_prepare_container_linux(
argmap, backends, FLAGS.enable_gpu, target_machine()
)

# Create a dedicated virtualenv so the wheel + openai-requirements
# pip installs below run in isolation from the distro-managed
# system Python (replaces the legacy PIP_BREAK_SYSTEM_PACKAGES=1
# escape hatch). If the python-backend branch above already
# created /opt/venv-tritonserver (on top of pyenv / Ubuntu
# python3), re-running `python3 -m venv` is a safe no-op; on
# minimal builds without the python backend this is the first
# creation. Derived images (Dockerfile.QA) inherit the venv via
# PATH.
df += """
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
"""

df += f"""
WORKDIR /opt
COPY --chown=1000:1000 build/install tritonserver
Expand All @@ -1260,7 +1285,7 @@ def create_dockerfile_linux(
find /opt/tritonserver/python -maxdepth 1 -type f -name \\
"tritonfrontend-*.whl" | xargs -I {{}} pip install --upgrade {{}}[{FLAGS.triton_wheels_dependencies_group}]

RUN pip3 install -r python/openai/requirements.txt
RUN pip install -r python/openai/requirements.txt

"""
if not FLAGS.no_core_build:
Expand Down Expand Up @@ -1349,20 +1374,22 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
if target_platform() == "rhel":
df += """
# Common dependencies.
RUN yum install -y \\
RUN dnf install -y \\
git \\
gperf \\
re2-devel \\
openssl-devel \\
libtool \\
libcurl-devel \\
libb64-devel \\
gperftools-devel \\
wget \\
python3.12-pip \\
numactl-devel
libb64-devel \\
libcurl-devel \\
libtool \\
numactl-devel \\
openssl-devel \\
python3.12-venv \\
re2-devel \\
wget

RUN pip3 install patchelf==0.17.2
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
RUN pip install patchelf==0.17.2

"""
else:
Expand All @@ -1387,6 +1414,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
wget \\
{backend_dependencies} \\
python3-pip \\
python3-venv \\
&& rm -rf /var/lib/apt/lists/*
""".format(
backend_dependencies=backend_dependencies
Expand Down Expand Up @@ -1438,8 +1466,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
# Requires openssl-devel to be installed first for pyenv build to be successful
df += change_default_python_version_rhel(FLAGS.rhel_py_version)
df += """
RUN pip3 install --upgrade pip \\
&& pip3 install --upgrade \\
# Create a dedicated virtualenv so pip installs are isolated from the
# distro-managed system Python. Built after pyenv has provided the
# desired Python version so the venv inherits that interpreter.
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"

RUN pip install --upgrade pip \\
&& pip install --upgrade \\
wheel \\
setuptools \\
\"numpy<2\" \\
Expand All @@ -1451,15 +1485,23 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
RUN apt-get update \\
&& apt-get install -y --no-install-recommends \\
python3 \\
python3-venv \\
libarchive-dev \\
python3-pip \\
python3-wheel \\
python3-setuptools \\
libpython3-dev \\
&& pip3 install --upgrade \\
\"numpy<2\" \\
virtualenv \\
&& rm -rf /var/lib/apt/lists/*

# Create a dedicated virtualenv so pip installs are isolated from the
# distro-managed system Python. Subsequent RUN steps pick up the
# venv's pip/python via PATH.
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"

RUN pip install --upgrade \\
\"numpy<2\" \\
virtualenv
"""
if "tensorrtllm" in backends or "vllm" in backends:
df += """
Expand Down Expand Up @@ -1552,10 +1594,12 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):

COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9

# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so
# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so.
RUN apt-get update \\
&& apt-get install -y --no-install-recommends openmpi-bin
RUN pip3 install patchelf==0.17.2
&& apt-get install -y --no-install-recommends openmpi-bin python3-venv
RUN python3 -m venv /opt/venv-tritonserver
ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
RUN pip install patchelf==0.17.2

ENV LD_LIBRARY_PATH /usr/local/cuda/targets/{cuda_arch}-linux/lib:/usr/local/cuda/lib64/stubs:${{LD_LIBRARY_PATH}}
""".format(
Expand Down Expand Up @@ -1806,6 +1850,53 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
"tritonserver_builder",
]

# Propagate wheel-naming context into the build container so
# build_wheel.py can compose the full wheel filename. See
# TRI-983. Both CLI flags and host env vars are checked so the
# value is defined in CI and local builds alike:
#
# * CI_PIPELINE_ID — GitLab pipeline ID; shared across all jobs
# in one pipeline so tritonserver and tritonfrontend wheels
# from the same release carry the same PEP 427 build tag.
# In CI, pass `--build-id=${CI_PIPELINE_ID}` to build.py.
# * NVIDIA_BUILD_ID — from --build-id; the primary vehicle for
# CI_PIPELINE_ID into the container. build_wheel.py falls back
# to this when CI_PIPELINE_ID is not exported directly.
# * NVIDIA_UPSTREAM_VERSION — primarily from
# --upstream-container-version (CI:
# `--upstream-container-version=${NVIDIA_UPSTREAM_VERSION}`;
# local: DEFAULT_TRITON_VERSION_MAP default). Falls back to
# the host env var when the CLI flag is empty so the
# +nv<X> local-version segment is still applied even if
# someone invokes build.py with `--upstream-container-version=`.
# * PYPI_RELEASE — when "true", build_wheel.py omits the
# +nv<X>.cu<Y> local-version suffix so the resulting wheel
# can be uploaded to PyPI (which rejects local versions).
#
# CUDA_VERSION is intentionally NOT propagated: the CUDA base
# image already sets it as an ENV inside the container, and
# the host/CI runner does not. Passing "-e CUDA_VERSION" with
# an empty host value would override (and erase) the
# container's value. build_wheel.py reads CUDA_VERSION from
# the container-local env (with a /usr/local/cuda/version.json
# fallback), which is where it is reliably set.
ci_pipeline_id = os.environ.get("CI_PIPELINE_ID")
if ci_pipeline_id:
runargs += ["-e", f"CI_PIPELINE_ID={ci_pipeline_id}"]
if FLAGS.build_id is not None:
runargs += ["-e", f"NVIDIA_BUILD_ID={FLAGS.build_id}"]
upstream_version = FLAGS.upstream_container_version or os.environ.get(
"NVIDIA_UPSTREAM_VERSION"
)
if upstream_version:
runargs += [
"-e",
f"NVIDIA_UPSTREAM_VERSION={upstream_version}",
]
pypi_release = os.environ.get("PYPI_RELEASE")
if pypi_release:
runargs += ["-e", f"PYPI_RELEASE={pypi_release}"]

if not FLAGS.no_container_interactive:
runargs += ["-it"]

Expand Down Expand Up @@ -2849,6 +2940,20 @@ def enable_all():

log("container version {}".format(FLAGS.container_version))
log("upstream container version {}".format(FLAGS.upstream_container_version))
# Explicit visibility for wheel-naming inputs (see TRI-983). If
# these are empty here, the wheel filename will lack the expected
# build-tag / local-version segments and the log below tells us
# which link in the chain dropped the value.
log(
"wheel-naming inputs: --build-id={!r}, --upstream-container-version={!r}, "
"CI_PIPELINE_ID={!r}, env NVIDIA_UPSTREAM_VERSION={!r}, PYPI_RELEASE={!r}".format(
FLAGS.build_id,
FLAGS.upstream_container_version,
os.environ.get("CI_PIPELINE_ID"),
os.environ.get("NVIDIA_UPSTREAM_VERSION"),
os.environ.get("PYPI_RELEASE"),
)
)

for ep in FLAGS.endpoint:
log(f'endpoint "{ep}"')
Expand Down
Loading
Loading