1- # Copyright 2019-2025 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+ # Copyright 2019-2026 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22#
33# Redistribution and use in source and binary forms, with or without
44# modification, are permitted provided that the following conditions
2929#
3030
3131# Base image on the minimum Triton container
32- ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.07 -py3-min
32+ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:26.02 -py3-min
3333
3434ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
35- ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
3635ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
3736ARG TRITON_COMMON_REPO_TAG=main
3837ARG TRITON_CORE_REPO_TAG=main
@@ -41,9 +40,8 @@ ARG TRITON_THIRD_PARTY_REPO_TAG=main
4140ARG TRITON_ENABLE_GPU=ON
4241ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
4342ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
44- ARG TRITON_PERF_ANALYZER_BUILD=1
4543# DCGM version to install for Model Analyzer
46- ARG DCGM_VERSION=4.2.3-2
44+ ARG DCGM_VERSION=4.5.2-1
4745
4846ARG NVIDIA_TRITON_SERVER_SDK_VERSION=unknown
4947ARG NVIDIA_BUILD_ID=unknown
@@ -97,7 +95,6 @@ RUN rm -f /usr/bin/python && \
9795# Build the client library and examples
9896ARG TRITON_REPO_ORGANIZATION
9997ARG TRITON_CLIENT_REPO_SUBDIR
100- ARG TRITON_PA_REPO_SUBDIR
10198ARG TRITON_COMMON_REPO_TAG
10299ARG TRITON_CORE_REPO_TAG
103100ARG TRITON_CLIENT_REPO_TAG
@@ -106,14 +103,10 @@ ARG TRITON_ENABLE_GPU
106103ARG JAVA_BINDINGS_MAVEN_VERSION
107104ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
108105ARG TARGETPLATFORM
109- ARG TRITON_PERF_ANALYZER_BUILD
110-
111- ENV TRITON_PERF_ANALYZER_BUILD=${TRITON_PERF_ANALYZER_BUILD}
112106
113107WORKDIR /workspace
114108COPY TRITON_VERSION .
115109COPY ${TRITON_CLIENT_REPO_SUBDIR} client
116- COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer
117110
118111WORKDIR /workspace/client_build
119112RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
@@ -124,63 +117,11 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
124117 -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
125118 -DTRITON_ENABLE_PERF_ANALYZER=OFF \
126119 -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
127- -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \
120+ -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
128121 -DTRITON_ENABLE_JAVA_HTTP=ON \
129122 -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
130123 -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
131- RUN cmake --build . -v --parallel --target cc-clients java-clients
132-
133- # TODO: PA will rebuild the CC clients since it depends on it.
134- # This should be optimized so that we do not have to build
135- # the CC clients twice. Similarly, because the SDK expectation is
136- # that PA is packaged with the python client, we hold off on building
137- # the python client until now. Post-migration we should focus
138- # effort on de-tangling these flows.
139- WORKDIR /workspace/pa_build
140- # NOTE: If TRITON_PERF_ANALYZER_BUILD=0, the Performance Analyzer (PA) binaries must already exist
141- # in the path specified by the ARG TRITON_PA_REPO_SUBDIR.
142- RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
143- cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
144- -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
145- -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
146- -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
147- -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
148- -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
149- -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
150- -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
151- -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
152- -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
153- -DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \
154- -DTRITON_ENABLE_CC_HTTP=ON \
155- -DTRITON_ENABLE_CC_GRPC=ON \
156- -DTRITON_ENABLE_PYTHON_HTTP=ON \
157- -DTRITON_ENABLE_PYTHON_GRPC=ON \
158- -DTRITON_PACKAGE_PERF_ANALYZER=ON \
159- -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
160- /workspace/perf_analyzer && \
161- cmake --build . -v --parallel --target perf-analyzer python-clients && \
162- pip3 install build && \
163- cd /workspace/perf_analyzer/genai-perf && \
164- python3 -m build --wheel --outdir /workspace/install/python; \
165- else \
166- ls /workspace/perf_analyzer/ && \
167- tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \
168- echo "Perf Analyzer binaries was extracted and not build" && \
169- cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
170- -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
171- -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
172- -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
173- -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
174- -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
175- -DTRITON_ENABLE_PYTHON_HTTP=ON \
176- -DTRITON_ENABLE_PYTHON_GRPC=ON \
177- -DTRITON_PACKAGE_PERF_ANALYZER=ON \
178- -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
179- /workspace/perf_analyzer && \
180- cmake --build . -v --parallel --target python-clients && \
181- mkdir -p /workspace/install/python && \
182- cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
183- fi
124+ RUN cmake --build . -v --parallel --target cc-clients java-clients python-clients
184125
185126# Install Java API Bindings
186127RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
@@ -226,7 +167,6 @@ RUN apt-get update && \
226167 python3-pip \
227168 python3-setuptools \
228169 python3-wheel \
229- software-properties-common \
230170 vim \
231171 wget && \
232172 pip3 install "grpcio<1.68" "grpcio-tools<1.68"
@@ -235,7 +175,6 @@ WORKDIR /workspace
235175COPY TRITON_VERSION .
236176COPY NVIDIA_Deep_Learning_Container_License.pdf .
237177COPY --from=sdk_build /workspace/client/ client/
238- COPY --from=sdk_build /workspace/perf_analyzer/ perf_analyzer/
239178COPY --from=sdk_build /workspace/install/ install/
240179RUN cd install && \
241180 export VERSION=`cat /workspace/TRITON_VERSION` && \
@@ -253,8 +192,6 @@ COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_cli
253192# Install an image needed by the quickstart and other documentation.
254193COPY qa/images/mug.jpg images/mug.jpg
255194
256- RUN pip3 install install/python/genai_perf-*.whl
257-
258195# Install the dependencies needed to run the client examples. These
259196# are not needed for building but including them allows this image to
260197# be used to run the client examples.
@@ -263,6 +200,9 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
263200 "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
264201 xargs pip3 install --upgrade
265202
203+ # Install GenAI-Perf
204+ RUN pip3 install genai-perf
205+
266206# Install DCGM
267207RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
268208 [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
@@ -279,6 +219,11 @@ RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
279219RUN rm -f /usr/bin/python && \
280220 ln -s /usr/bin/python3 /usr/bin/python
281221
222+ # Install Model Analyzer
223+ ARG TRITON_MODEL_ANALYZER_REPO_TAG
224+ ARG TRITON_MODEL_ANALYZER_REPO="${TRITON_REPO_ORGANIZATION}/model_analyzer@${TRITON_MODEL_ANALYZER_REPO_TAG}"
225+ RUN pip3 install "git+${TRITON_MODEL_ANALYZER_REPO}"
226+
282227# Entrypoint Banner
283228ENV NVIDIA_PRODUCT_NAME="Triton Server SDK"
284229COPY docker/entrypoint.d/ /opt/nvidia/entrypoint.d/
0 commit comments