From 9d718da1fcf6cf48ee72731e25f2aa49aa55137e Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 29 Apr 2026 09:52:11 -0700 Subject: [PATCH 1/6] fix: tag tritonfrontend wheel with arch-specific platform tag (TRI-983) Replace the legacy bdist_wheel.get_tag() override that hard-coded py3-none-any with BinaryDistribution.has_ext_modules()=True, mirroring the fix applied to the tritonserver wheel in core/. Add _repair_wheel_with_auditwheel() to build_wheel.py so the wheel is upgraded from linux_ to manylinux_2_X_ for PyPI compatibility. The container image still receives the linux_ wheel from generic/wheel/dist/ (correct for in-container pip install); the manylinux wheel is written to generic/ for future PyPI publishing. Also replace the deprecated distutils.dir_util.copy_tree with shutil.copytree (symlinks=True, dirs_exist_ok=True). --- src/python/build_wheel.py | 70 ++++++++++++++++++++++++++++++++++++--- src/python/setup.py | 31 ++++++----------- 2 files changed, 76 insertions(+), 25 deletions(-) diff --git a/src/python/build_wheel.py b/src/python/build_wheel.py index 875dd32a70..e8e91812f1 100755 --- a/src/python/build_wheel.py +++ b/src/python/build_wheel.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -32,7 +32,6 @@ import shutil import subprocess import sys -from distutils.dir_util import copy_tree from tempfile import mkstemp @@ -51,7 +50,7 @@ def touch(path): def cpdir(src, dest): - copy_tree(src, dest, preserve_symlinks=1) + shutil.copytree(src, dest, symlinks=True, dirs_exist_ok=True) def sed(pattern, replace, source, dest=None): @@ -70,6 +69,69 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile(name, source) +def _repair_wheel_with_auditwheel(whl_dir, dest_dir): + """Upgrade a linux_ wheel to manylinux_2_X_. + + Ports the pattern established for tritonclient in TRI-286: + 1. auditwheel repair — auto-discovers the minimum manylinux tag + by inspecting glibc symbol requirements of the embedded .so. + 2. python -m wheel tags fallback — used when auditwheel reports + "no ELF" (the wheel has no native extension, e.g. a downstream + build disabled bindings). Mirrors the documented fallback. + 3. No-op with warning — when auditwheel is not installed in the + build image, keep the linux_ wheel as-is so the build + does not regress. + """ + if shutil.which("auditwheel") is None: + print( + "=== WARNING: auditwheel not found on PATH; keeping linux_ " + "wheel as-is. Install auditwheel in the build image to produce " + "PyPI-acceptable manylinux_2_X_ wheels.", + file=sys.stderr, + ) + shutil.copytree(os.path.join(whl_dir, "dist"), dest_dir, dirs_exist_ok=True) + return + + dist_dir = os.path.join(whl_dir, "dist") + wheels = [ + os.path.join(dist_dir, w) for w in os.listdir(dist_dir) if w.endswith(".whl") + ] + fail_if(not wheels, "no wheel produced by the build") + + for wheel_path in wheels: + print(f"=== Running auditwheel repair on {wheel_path}") + r = subprocess.run( + ["auditwheel", "repair", wheel_path, "--wheel-dir", dest_dir], + capture_output=True, + text=True, + ) + if r.returncode != 0 and "no ELF" in r.stderr: + arch = os.uname().machine + manylinux_tag = f"manylinux_2_28_{arch}" + print( + f"=== Pure-Python wheel detected; falling back to wheel tags " + f"({manylinux_tag})" + ) + copied = os.path.join(dest_dir, os.path.basename(wheel_path)) + shutil.copy(wheel_path, copied) + r2 = subprocess.run( + [ + "python3", + "-m", + "wheel", + "tags", + "--platform-tag", + manylinux_tag, + "--remove", + copied, + ] + ) + fail_if(r2.returncode != 0, "wheel tags fallback failed") + elif r.returncode != 0: + sys.stderr.write(r.stderr) + fail_if(True, "auditwheel repair failed") + + def main(): parser = argparse.ArgumentParser() @@ -125,7 +187,7 @@ def main(): p.wait() fail_if(p.returncode != 0, "setup.py failed") - cpdir("dist", FLAGS.dest_dir) + _repair_wheel_with_auditwheel(FLAGS.whl_dir, FLAGS.dest_dir) print(f"=== Output wheel file is in: {FLAGS.dest_dir}") touch(os.path.join(FLAGS.dest_dir, "stamp.whl")) diff --git a/src/python/setup.py b/src/python/setup.py index 2c7c12a9ee..12cf7b9afb 100755 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -26,34 +26,23 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -import sys -from setuptools import find_packages, setup - -if "--plat-name" in sys.argv: - PLATFORM_FLAG = sys.argv[sys.argv.index("--plat-name") + 1] -else: - PLATFORM_FLAG = "any" +from setuptools import Distribution, find_packages, setup if "VERSION" not in os.environ: raise Exception("envvar VERSION must be specified") VERSION = os.environ["VERSION"] -try: - from wheel.bdist_wheel import bdist_wheel as _bdist_wheel - - class bdist_wheel(_bdist_wheel): - def finalize_options(self): - _bdist_wheel.finalize_options(self) - self.root_is_pure = False - def get_tag(self): - pyver, abi, plat = "py3", "none", PLATFORM_FLAG - return pyver, abi, plat +# The wheel ships an arch-specific pybind11 extension bundled via +# package_data. Without has_ext_modules()=True setuptools marks the +# wheel pure-Python (py3-none-any), which auditwheel rejects. +# See TRI-983. +class BinaryDistribution(Distribution): + def has_ext_modules(self): + return True -except ImportError: - bdist_wheel = None this_directory = os.path.abspath(os.path.dirname(__file__)) @@ -105,7 +94,7 @@ def get_tag(self): "": platform_package_data, }, zip_safe=False, - cmdclass={"bdist_wheel": bdist_wheel}, + distclass=BinaryDistribution, data_files=data_files, install_requires=["tritonserver", "pydantic==2.10.6"], extras_require={"GPU": gpu_extras, "test": test_extras, "all": all_extras}, From 1ed7ed089db3c0dfc73d7b8df32d410127e986ba Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 29 Apr 2026 11:30:33 -0700 Subject: [PATCH 2/6] fix: add build-tag and _compose_version to tritonfrontend wheel (TRI-983) Port _detect_cuda_version() and _compose_version() from core/python/ build_wheel.py so the tritonfrontend wheel gets the same +nv{release}.cu{cudaXY} local-version segment as tritonserver. Add PEP 427 build-tag sourcing (CI_PIPELINE_ID / NVIDIA_BUILD_ID / BUILD_NUMBER) passed as --build-number= to setup.py bdist_wheel, mirroring the logic in core/python/build_wheel.py. --- src/python/build_wheel.py | 80 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/src/python/build_wheel.py b/src/python/build_wheel.py index e8e91812f1..e3a597c89d 100755 --- a/src/python/build_wheel.py +++ b/src/python/build_wheel.py @@ -69,6 +69,62 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile(name, source) +def _detect_cuda_version(): + """Detect the CUDA toolkit version visible to the build. + + Prefers the CUDA_VERSION env var (set by official NVIDIA base + images); falls back to parsing /usr/local/cuda/version.json which + is the canonical location for the installed toolkit. Returns the + raw string (e.g. "13.2.1") or None when CUDA is not available. + """ + v = os.environ.get("CUDA_VERSION") + if v: + return v + try: + import json as _json + + with open("/usr/local/cuda/version.json") as f: + data = _json.load(f) + return data.get("cuda", {}).get("version") + except (OSError, ValueError, KeyError): + return None + + +def _compose_version(base_version): + """Compose the full wheel version string. + + Appends a PEP 440 local-version segment describing the NVIDIA + container release and CUDA toolkit so consumers can tell an + nv26.04 wheel from an nv26.05 wheel and a cu132 wheel from a + cu128 wheel. All sources are optional; local non-CI builds return + the version unchanged. + """ + nv = ( + os.environ.get("NVIDIA_UPSTREAM_VERSION") + or os.environ.get("NVIDIA_TRITON_SERVER_VERSION") + or os.environ.get("TRITON_CONTAINER_VERSION") + ) + cuda = _detect_cuda_version() + print( + f"=== Wheel local-version inputs: " + f"NVIDIA_UPSTREAM_VERSION={os.environ.get('NVIDIA_UPSTREAM_VERSION')!r} " + f"NVIDIA_TRITON_SERVER_VERSION={os.environ.get('NVIDIA_TRITON_SERVER_VERSION')!r} " + f"TRITON_CONTAINER_VERSION={os.environ.get('TRITON_CONTAINER_VERSION')!r} " + f"-> nv={nv!r}, cuda={cuda!r}", + file=sys.stderr, + ) + local = [] + if nv: + local.append(f"nv{nv}") + if cuda: + parts = cuda.split(".") + if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit(): + local.append(f"cu{parts[0]}{parts[1]}") + if local: + return f"{base_version}+{'.'.join(local)}" + return base_version + + def _repair_wheel_with_auditwheel(whl_dir, dest_dir): """Upgrade a linux_ wheel to manylinux_2_X_. @@ -179,9 +235,31 @@ def main(): os.chdir(FLAGS.whl_dir) print("=== Building wheel") args = ["python3", "setup.py", "bdist_wheel"] + # PEP 427 build tag: lets two wheels of the same version coexist + # (e.g. reruns of the same CI pipeline). Sources, first non-empty + # and usable wins: + # CI_PIPELINE_ID - GitLab pipeline-scoped ID (preferred). + # NVIDIA_BUILD_ID - from build.py's --build-id flag. + # BUILD_NUMBER - generic CI systems. + # PEP 427 requires the build tag to start with a digit. + build_tag = ( + os.environ.get("CI_PIPELINE_ID") + or os.environ.get("NVIDIA_BUILD_ID") + or os.environ.get("BUILD_NUMBER") + ) + print( + f"=== Wheel build-tag inputs: " + f"CI_PIPELINE_ID={os.environ.get('CI_PIPELINE_ID')!r} " + f"NVIDIA_BUILD_ID={os.environ.get('NVIDIA_BUILD_ID')!r} " + f"BUILD_NUMBER={os.environ.get('BUILD_NUMBER')!r} " + f"-> build-tag={build_tag!r}", + file=sys.stderr, + ) + if build_tag and build_tag != "" and build_tag[:1].isdigit(): + args += [f"--build-number={build_tag}"] wenv = os.environ.copy() - wenv["VERSION"] = FLAGS.triton_version + wenv["VERSION"] = _compose_version(FLAGS.triton_version) wenv["TRITON_PYBIND"] = PYBIND_LIB p = subprocess.Popen(args, env=wenv) p.wait() From 19b57124150103bd463aea6f74f97f4b37ece36e Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 29 Apr 2026 11:41:45 -0700 Subject: [PATCH 3/6] fix: bake CI_PIPELINE_ID into buildbase Dockerfile when set (TRI-983) build_wheel.py prefers CI_PIPELINE_ID over NVIDIA_BUILD_ID as the PEP 427 wheel build tag, but CI_PIPELINE_ID was never forwarded into the build container. NVIDIA_BUILD_ID is baked in as ENV from build.py --build-id, so add the same treatment for CI_PIPELINE_ID: read it from the host environment and emit ENV CI_PIPELINE_ID in both the Linux and Windows buildbase Dockerfiles, but only when the variable is non-empty so local builds are unaffected. --- build.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/build.py b/build.py index cb3490bf0d..79e88c80f5 100755 --- a/build.py +++ b/build.py @@ -1512,6 +1512,8 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach """.format( argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"] ) + if argmap.get("CI_PIPELINE_ID"): + df += "ENV CI_PIPELINE_ID {}\n".format(argmap["CI_PIPELINE_ID"]) return df @@ -1645,6 +1647,8 @@ def create_dockerfile_windows( """.format( argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"] ) + if argmap.get("CI_PIPELINE_ID"): + df += "ENV CI_PIPELINE_ID {}\n".format(argmap["CI_PIPELINE_ID"]) with open(os.path.join(ddir, dockerfile_name), "w") as dfile: dfile.write(df) @@ -1678,6 +1682,7 @@ def create_build_dockerfiles( dockerfileargmap = { "NVIDIA_BUILD_REF": "" if FLAGS.build_sha is None else FLAGS.build_sha, "NVIDIA_BUILD_ID": "" if FLAGS.build_id is None else FLAGS.build_id, + "CI_PIPELINE_ID": os.environ.get("CI_PIPELINE_ID", ""), "TRITON_VERSION": FLAGS.version, "TRITON_CONTAINER_VERSION": FLAGS.container_version, "BASE_IMAGE": base_image, From 83911033af28b5e5fc73fbbe25c5b7770498c7a2 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 29 Apr 2026 11:42:49 -0700 Subject: [PATCH 4/6] Revert "fix: bake CI_PIPELINE_ID into buildbase Dockerfile when set (TRI-983)" This reverts commit 19b57124150103bd463aea6f74f97f4b37ece36e. --- build.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/build.py b/build.py index 79e88c80f5..cb3490bf0d 100755 --- a/build.py +++ b/build.py @@ -1512,8 +1512,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach """.format( argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"] ) - if argmap.get("CI_PIPELINE_ID"): - df += "ENV CI_PIPELINE_ID {}\n".format(argmap["CI_PIPELINE_ID"]) return df @@ -1647,8 +1645,6 @@ def create_dockerfile_windows( """.format( argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_ID"], argmap["NVIDIA_BUILD_REF"] ) - if argmap.get("CI_PIPELINE_ID"): - df += "ENV CI_PIPELINE_ID {}\n".format(argmap["CI_PIPELINE_ID"]) with open(os.path.join(ddir, dockerfile_name), "w") as dfile: dfile.write(df) @@ -1682,7 +1678,6 @@ def create_build_dockerfiles( dockerfileargmap = { "NVIDIA_BUILD_REF": "" if FLAGS.build_sha is None else FLAGS.build_sha, "NVIDIA_BUILD_ID": "" if FLAGS.build_id is None else FLAGS.build_id, - "CI_PIPELINE_ID": os.environ.get("CI_PIPELINE_ID", ""), "TRITON_VERSION": FLAGS.version, "TRITON_CONTAINER_VERSION": FLAGS.container_version, "BASE_IMAGE": base_image, From f3e3e644e290facd68ae6106113603e8aafc22f3 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 29 Apr 2026 11:51:49 -0700 Subject: [PATCH 5/6] Adding environment variable to the build-base image --- build.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/build.py b/build.py index cb3490bf0d..557f55ae77 100755 --- a/build.py +++ b/build.py @@ -972,6 +972,13 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): xz-devel \\ zlib-devel """ + if argmap["NVIDIA_BUILD_ID"] is not None: + df += """ +ENV BUILD_NUMBER={} +""".format( + argmap["NVIDIA_BUILD_ID"] + ) + if os.getenv("CCACHE_REMOTE_ONLY") and os.getenv("CCACHE_REMOTE_STORAGE"): df += """ RUN curl -k -s -L https://github.com/ccache/ccache/archive/refs/tags/v4.10.2.tar.gz -o /tmp/ccache.tar.gz \\ @@ -1049,6 +1056,12 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): ARG TRITON_CONTAINER_VERSION ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5 """ + if argmap["NVIDIA_BUILD_ID"] is not None: + df += """ +ENV BUILD_NUMBER={} +""".format( + argmap["NVIDIA_BUILD_ID"] + ) # Install the windows- or linux-specific buildbase dependencies if target_platform() == "windows": df += """ From 11d1b3b00e5845bfb1974ec9672dc132b4d390d3 Mon Sep 17 00:00:00 2001 From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com> Date: Mon, 4 May 2026 19:22:50 +0000 Subject: [PATCH 6/6] Add rturn values. --- src/python/build_wheel.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/python/build_wheel.py b/src/python/build_wheel.py index e3a597c89d..27edac6ec5 100755 --- a/src/python/build_wheel.py +++ b/src/python/build_wheel.py @@ -69,13 +69,15 @@ def sed(pattern, replace, source, dest=None): shutil.copyfile(name, source) -def _detect_cuda_version(): +def _detect_cuda_version() -> str | None: """Detect the CUDA toolkit version visible to the build. Prefers the CUDA_VERSION env var (set by official NVIDIA base images); falls back to parsing /usr/local/cuda/version.json which - is the canonical location for the installed toolkit. Returns the - raw string (e.g. "13.2.1") or None when CUDA is not available. + is the canonical location for the installed toolkit. + + Returns: + str or None: The CUDA version as a string (e.g. "13.2.1"), or None if CUDA is not available. """ v = os.environ.get("CUDA_VERSION") if v: