diff --git a/Dockerfile.QA b/Dockerfile.QA index a8ffbd8a19..d605b12c13 100644 --- a/Dockerfile.QA +++ b/Dockerfile.QA @@ -66,7 +66,9 @@ RUN apt-get update && \ software-properties-common && \ rm -rf /var/lib/apt/lists/* -RUN pip3 install cmake==4.0.3 +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" +RUN pip install cmake==4.0.3 ENV CMAKE_POLICY_VERSION_MINIMUM=3.5 # Add densenet_onnx model to example repo @@ -348,7 +350,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN rm -f /usr/bin/python && \ ln -s /usr/bin/python3 /usr/bin/python -RUN pip3 install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \ +RUN pip install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \ "awscli<=1.36.40" six "grpcio-channelz<1.68" prettytable virtualenv \ check-jsonschema @@ -377,7 +379,7 @@ COPY --chown=1000:1000 --from=sdk /workspace/qa/ qa/ RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \ find qa/pkgs/ -maxdepth 1 -type f -name \ "tritonclient-*-py3-none-any.whl" | xargs printf -- '%s[all]' | \ - xargs pip3 install --upgrade + xargs pip install --upgrade ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH} diff --git a/build.py b/build.py index 774728f189..a60525e6a5 100755 --- a/build.py +++ b/build.py @@ -930,7 +930,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): ARG TRITON_VERSION ARG TRITON_CONTAINER_VERSION -ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5 +ENV CMAKE_POLICY_VERSION_MINIMUM=3.5 """ df += """ # Install docker docker buildx @@ -994,15 +994,22 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): df += change_default_python_version_rhel(FLAGS.rhel_py_version) df += """ -RUN pip3 install --upgrade pip \\ - && pip3 install --upgrade \\ +# Create a dedicated virtualenv so pip installs are isolated from the +# distro-managed system Python. Subsequent RUN steps pick up the +# venv's pip/python via PATH. +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" + +RUN pip install --upgrade pip \\ + && pip install --upgrade \\ build \\ wheel \\ setuptools \\ docker \\ virtualenv \\ patchelf==0.17.2 \\ - cmake==4.0.3 + cmake==4.0.3 \\ + auditwheel """ df += f""" # Install boost version >= 1.78 for boost::span @@ -1047,7 +1054,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): ARG TRITON_VERSION ARG TRITON_CONTAINER_VERSION -ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5 +ENV CMAKE_POLICY_VERSION_MINIMUM=3.5 """ # Install the windows- or linux-specific buildbase dependencies if target_platform() == "windows": @@ -1094,9 +1101,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): libb64-dev \\ libgoogle-perftools-dev \\ python3-dev \\ - python3-pip \\ - python3-wheel \\ - python3-setuptools \\ + python3-venv \\ rapidjson-dev \\ scons \\ software-properties-common \\ @@ -1110,12 +1115,21 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): wget \\ && rm -rf /var/lib/apt/lists/* -RUN pip3 install --upgrade \\ +# Create a dedicated virtualenv so pip installs are isolated from the +# distro-managed system Python. Subsequent RUN steps pick up the +# venv's pip/python via PATH. +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" + +RUN pip install --upgrade \\ build \\ + wheel \\ + setuptools \\ docker \\ virtualenv \\ patchelf==0.17.2 \\ cmake==4.0.3 \\ + auditwheel \\ pybind11[global] """ @@ -1194,7 +1208,6 @@ def create_dockerfile_cibase(ddir, dockerfile_name, argmap): ENV TRITON_SERVER_VERSION ${TRITON_VERSION} ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION} -ENV PIP_BREAK_SYSTEM_PACKAGES=1 """ with open(os.path.join(ddir, dockerfile_name), "w") as dfile: @@ -1241,14 +1254,26 @@ def create_dockerfile_linux( ## Production stage: Create container with just inference server executable ############################################################################ FROM ${BASE_IMAGE} - -ENV PIP_BREAK_SYSTEM_PACKAGES=1 """ df += dockerfile_prepare_container_linux( argmap, backends, FLAGS.enable_gpu, target_machine() ) + # Create a dedicated virtualenv so the wheel + openai-requirements + # pip installs below run in isolation from the distro-managed + # system Python (replaces the legacy PIP_BREAK_SYSTEM_PACKAGES=1 + # escape hatch). If the python-backend branch above already + # created /opt/venv-tritonserver (on top of pyenv / Ubuntu + # python3), re-running `python3 -m venv` is a safe no-op; on + # minimal builds without the python backend this is the first + # creation. Derived images (Dockerfile.QA) inherit the venv via + # PATH. + df += """ +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" +""" + df += f""" WORKDIR /opt COPY --chown=1000:1000 build/install tritonserver @@ -1260,7 +1285,7 @@ def create_dockerfile_linux( find /opt/tritonserver/python -maxdepth 1 -type f -name \\ "tritonfrontend-*.whl" | xargs -I {{}} pip install --upgrade {{}}[{FLAGS.triton_wheels_dependencies_group}] -RUN pip3 install -r python/openai/requirements.txt +RUN pip install -r python/openai/requirements.txt """ if not FLAGS.no_core_build: @@ -1349,20 +1374,22 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach if target_platform() == "rhel": df += """ # Common dependencies. -RUN yum install -y \\ +RUN dnf install -y \\ git \\ gperf \\ - re2-devel \\ - openssl-devel \\ - libtool \\ - libcurl-devel \\ - libb64-devel \\ gperftools-devel \\ - wget \\ - python3.12-pip \\ - numactl-devel + libb64-devel \\ + libcurl-devel \\ + libtool \\ + numactl-devel \\ + openssl-devel \\ + python3.12-venv \\ + re2-devel \\ + wget -RUN pip3 install patchelf==0.17.2 +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" +RUN pip install patchelf==0.17.2 """ else: @@ -1387,6 +1414,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach wget \\ {backend_dependencies} \\ python3-pip \\ + python3-venv \\ && rm -rf /var/lib/apt/lists/* """.format( backend_dependencies=backend_dependencies @@ -1438,8 +1466,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach # Requires openssl-devel to be installed first for pyenv build to be successful df += change_default_python_version_rhel(FLAGS.rhel_py_version) df += """ -RUN pip3 install --upgrade pip \\ - && pip3 install --upgrade \\ +# Create a dedicated virtualenv so pip installs are isolated from the +# distro-managed system Python. Built after pyenv has provided the +# desired Python version so the venv inherits that interpreter. +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" + +RUN pip install --upgrade pip \\ + && pip install --upgrade \\ wheel \\ setuptools \\ \"numpy<2\" \\ @@ -1451,15 +1485,23 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach RUN apt-get update \\ && apt-get install -y --no-install-recommends \\ python3 \\ + python3-venv \\ libarchive-dev \\ python3-pip \\ python3-wheel \\ python3-setuptools \\ libpython3-dev \\ - && pip3 install --upgrade \\ - \"numpy<2\" \\ - virtualenv \\ && rm -rf /var/lib/apt/lists/* + +# Create a dedicated virtualenv so pip installs are isolated from the +# distro-managed system Python. Subsequent RUN steps pick up the +# venv's pip/python via PATH. +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" + +RUN pip install --upgrade \\ + \"numpy<2\" \\ + virtualenv """ if "tensorrtllm" in backends or "vllm" in backends: df += """ @@ -1552,10 +1594,12 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine): COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 -# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so +# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so. RUN apt-get update \\ - && apt-get install -y --no-install-recommends openmpi-bin -RUN pip3 install patchelf==0.17.2 + && apt-get install -y --no-install-recommends openmpi-bin python3-venv +RUN python3 -m venv /opt/venv-tritonserver +ENV PATH="/opt/venv-tritonserver/bin:${PATH}" +RUN pip install patchelf==0.17.2 ENV LD_LIBRARY_PATH /usr/local/cuda/targets/{cuda_arch}-linux/lib:/usr/local/cuda/lib64/stubs:${{LD_LIBRARY_PATH}} """.format( @@ -1806,6 +1850,53 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_ "tritonserver_builder", ] + # Propagate wheel-naming context into the build container so + # build_wheel.py can compose the full wheel filename. See + # TRI-983. Both CLI flags and host env vars are checked so the + # value is defined in CI and local builds alike: + # + # * CI_PIPELINE_ID — GitLab pipeline ID; shared across all jobs + # in one pipeline so tritonserver and tritonfrontend wheels + # from the same release carry the same PEP 427 build tag. + # In CI, pass `--build-id=${CI_PIPELINE_ID}` to build.py. + # * NVIDIA_BUILD_ID — from --build-id; the primary vehicle for + # CI_PIPELINE_ID into the container. build_wheel.py falls back + # to this when CI_PIPELINE_ID is not exported directly. + # * NVIDIA_UPSTREAM_VERSION — primarily from + # --upstream-container-version (CI: + # `--upstream-container-version=${NVIDIA_UPSTREAM_VERSION}`; + # local: DEFAULT_TRITON_VERSION_MAP default). Falls back to + # the host env var when the CLI flag is empty so the + # +nv local-version segment is still applied even if + # someone invokes build.py with `--upstream-container-version=`. + # * PYPI_RELEASE — when "true", build_wheel.py omits the + # +nv.cu local-version suffix so the resulting wheel + # can be uploaded to PyPI (which rejects local versions). + # + # CUDA_VERSION is intentionally NOT propagated: the CUDA base + # image already sets it as an ENV inside the container, and + # the host/CI runner does not. Passing "-e CUDA_VERSION" with + # an empty host value would override (and erase) the + # container's value. build_wheel.py reads CUDA_VERSION from + # the container-local env (with a /usr/local/cuda/version.json + # fallback), which is where it is reliably set. + ci_pipeline_id = os.environ.get("CI_PIPELINE_ID") + if ci_pipeline_id: + runargs += ["-e", f"CI_PIPELINE_ID={ci_pipeline_id}"] + if FLAGS.build_id is not None: + runargs += ["-e", f"NVIDIA_BUILD_ID={FLAGS.build_id}"] + upstream_version = FLAGS.upstream_container_version or os.environ.get( + "NVIDIA_UPSTREAM_VERSION" + ) + if upstream_version: + runargs += [ + "-e", + f"NVIDIA_UPSTREAM_VERSION={upstream_version}", + ] + pypi_release = os.environ.get("PYPI_RELEASE") + if pypi_release: + runargs += ["-e", f"PYPI_RELEASE={pypi_release}"] + if not FLAGS.no_container_interactive: runargs += ["-it"] @@ -2849,6 +2940,20 @@ def enable_all(): log("container version {}".format(FLAGS.container_version)) log("upstream container version {}".format(FLAGS.upstream_container_version)) + # Explicit visibility for wheel-naming inputs (see TRI-983). If + # these are empty here, the wheel filename will lack the expected + # build-tag / local-version segments and the log below tells us + # which link in the chain dropped the value. + log( + "wheel-naming inputs: --build-id={!r}, --upstream-container-version={!r}, " + "CI_PIPELINE_ID={!r}, env NVIDIA_UPSTREAM_VERSION={!r}, PYPI_RELEASE={!r}".format( + FLAGS.build_id, + FLAGS.upstream_container_version, + os.environ.get("CI_PIPELINE_ID"), + os.environ.get("NVIDIA_UPSTREAM_VERSION"), + os.environ.get("PYPI_RELEASE"), + ) + ) for ep in FLAGS.endpoint: log(f'endpoint "{ep}"') diff --git a/src/python/build_wheel.py b/src/python/build_wheel.py index 875dd32a70..ef358d0689 100755 --- a/src/python/build_wheel.py +++ b/src/python/build_wheel.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -32,6 +32,7 @@ import shutil import subprocess import sys +import sysconfig from distutils.dir_util import copy_tree from tempfile import mkstemp @@ -54,6 +55,100 @@ def cpdir(src, dest): copy_tree(src, dest, preserve_symlinks=1) +def _detect_cuda_version(): + """Detect the CUDA toolkit version visible to the build. + + Prefers the CUDA_VERSION env var (set by official NVIDIA base + images); falls back to parsing /usr/local/cuda/version.json which + is the canonical location for the installed toolkit. Returns the + raw string (e.g. "13.2.1") or None when CUDA is not available. + + CUDA_VERSION is only reliably set inside the build container (the + CUDA base image exports it) and must not be propagated from the + host — see the matching comment in build.py's docker-run + invocation. + """ + v = os.environ.get("CUDA_VERSION") + if v: + return v + try: + import json as _json + + with open("/usr/local/cuda/version.json") as f: + data = _json.load(f) + return data.get("cuda", {}).get("version") + except (OSError, ValueError, KeyError): + return None + + +def _compose_version(base_version): + """Compose the full wheel version string. + + The base version comes from TRITON_VERSION and may already include a + PEP 440 pre-release suffix (e.g. "2.69.0.dev0"). Append a PEP 440 + local-version segment describing the NVIDIA container release and + CUDA toolkit the wheel was built against, so consumers can tell an + nv26.04 wheel from an nv26.05 wheel (same upstream Triton version) + and a cu132 wheel from a cu128 wheel. The local-version segment is + purely informational and does not affect pip's version comparison. + + When PYPI_RELEASE=true the local-version suffix is omitted entirely: + PyPI rejects uploads whose version contains a '+' local segment, so + public release builds must use the bare version. + + Sources for NVIDIA upstream version (first non-empty wins): + NVIDIA_UPSTREAM_VERSION - propagated by build.py via + `docker run -e` from + FLAGS.upstream_container_version. + NVIDIA_TRITON_SERVER_VERSION - set as ENV in the buildbase image + at image-build time from the + TRITON_CONTAINER_VERSION ARG + (survives even if the docker-run + `-e` forwarding is not applied). + TRITON_CONTAINER_VERSION - set as ENV in some downstream + images; same value as above in CI. + Source for CUDA toolkit version: + CUDA_VERSION / toolkit - discovered by _detect_cuda_version() + + All sources are optional; if none is present the version is returned + unchanged so local non-CI builds stay stable. Each detection + outcome is logged to stderr so any future gap is self-announcing + in the build log rather than surfacing only as a missing suffix in + the wheel filename. + """ + if os.environ.get("PYPI_RELEASE", "").lower() in ("1", "true", "yes"): + print( + "=== PYPI_RELEASE set: omitting local-version suffix for PyPI compatibility", + file=sys.stderr, + ) + return base_version + nv = ( + os.environ.get("NVIDIA_UPSTREAM_VERSION") + or os.environ.get("NVIDIA_TRITON_SERVER_VERSION") + or os.environ.get("TRITON_CONTAINER_VERSION") + ) + cuda = _detect_cuda_version() + print( + f"=== Wheel local-version inputs: " + f"NVIDIA_UPSTREAM_VERSION={os.environ.get('NVIDIA_UPSTREAM_VERSION')!r} " + f"NVIDIA_TRITON_SERVER_VERSION={os.environ.get('NVIDIA_TRITON_SERVER_VERSION')!r} " + f"TRITON_CONTAINER_VERSION={os.environ.get('TRITON_CONTAINER_VERSION')!r} " + f"-> nv={nv!r}, cuda={cuda!r}", + file=sys.stderr, + ) + local = [] + if nv: + local.append(f"nv{nv}") + if cuda: + # "13.2" / "13.2.0" / "13.2.1" -> "cu132" + parts = cuda.split(".") + if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit(): + local.append(f"cu{parts[0]}{parts[1]}") + if local: + return f"{base_version}+{'.'.join(local)}" + return base_version + + def sed(pattern, replace, source, dest=None): name = None if dest: @@ -115,21 +210,134 @@ def main(): shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py")) os.chdir(FLAGS.whl_dir) + # The wheel ships an arch-specific CPython extension + # (tritonfrontend/_c/.so). Pass --plat-name so the wheel is + # tagged with the current platform (e.g. linux_x86_64 / linux_aarch64) + # instead of the misleading "none-any". + plat_name = sysconfig.get_platform().replace("-", "_").replace(".", "_") print("=== Building wheel") - args = ["python3", "setup.py", "bdist_wheel"] + args = ["python3", "setup.py", "bdist_wheel", "--plat-name", plat_name] + # PEP 427 "build tag": an optional segment between version and + # python-tag that lets two wheels of the same version coexist + # (e.g. re-runs of the same pipeline). Sources, first non-empty + # and usable wins: + # CI_PIPELINE_ID - GitLab pipeline ID; shared by all jobs in one + # pipeline so tritonserver and tritonfrontend + # wheels from the same release carry the same + # tag. In CI, build.py is invoked with + # `--build-id=${CI_PIPELINE_ID}`. + # NVIDIA_BUILD_ID - set from build.py's --build-id flag; primary + # vehicle for CI_PIPELINE_ID into the container. + # BUILD_NUMBER - generic CI systems that use this instead. + # PEP 427 requires the build tag to start with a digit. Skip the + # slot when the value does not satisfy that constraint or is the + # "" default emitted for local builds without --build-id. + if os.environ.get("PYPI_RELEASE", "").lower() in ("1", "true", "yes"): + build_tag = None + else: + build_tag = ( + os.environ.get("CI_PIPELINE_ID") + or os.environ.get("NVIDIA_BUILD_ID") + or os.environ.get("BUILD_NUMBER") + ) + print( + f"=== Wheel build-tag inputs: " + f"PYPI_RELEASE={os.environ.get('PYPI_RELEASE')!r} " + f"CI_PIPELINE_ID={os.environ.get('CI_PIPELINE_ID')!r} " + f"NVIDIA_BUILD_ID={os.environ.get('NVIDIA_BUILD_ID')!r} " + f"BUILD_NUMBER={os.environ.get('BUILD_NUMBER')!r} " + f"-> build-tag={build_tag!r}", + file=sys.stderr, + ) + if build_tag and build_tag != "" and build_tag[:1].isdigit(): + args += ["--build-number", build_tag] wenv = os.environ.copy() - wenv["VERSION"] = FLAGS.triton_version + wenv["VERSION"] = _compose_version(FLAGS.triton_version) wenv["TRITON_PYBIND"] = PYBIND_LIB p = subprocess.Popen(args, env=wenv) p.wait() fail_if(p.returncode != 0, "setup.py failed") - cpdir("dist", FLAGS.dest_dir) + # Post-process with auditwheel so the wheel is tagged with a proper + # manylinux_2_X_ platform (required by canonical PyPI). When + # auditwheel is unavailable in the build image we keep the + # linux_ wheel and emit a warning; the Poetry/pip lock-file + # problem is already solved by the distinct filename, and the tag can + # be fixed up in a follow-up publish step if needed. + _repair_wheel_with_auditwheel(FLAGS.whl_dir, FLAGS.dest_dir) print(f"=== Output wheel file is in: {FLAGS.dest_dir}") touch(os.path.join(FLAGS.dest_dir, "stamp.whl")) +def _repair_wheel_with_auditwheel(whl_dir, dest_dir): + """Upgrade a linux_ wheel to manylinux_2_X_. + + Ports the pattern established for tritonclient in TRI-286: + 1. auditwheel repair — auto-discovers the minimum manylinux tag + by inspecting glibc symbol requirements of the embedded .so. + 2. python -m wheel tags fallback — used when auditwheel reports + "no ELF" (the wheel has no native extension, e.g. a downstream + build disabled bindings). Mirrors the documented fallback. + 3. No-op with warning — when auditwheel is not installed in the + build image, keep the linux_ wheel as-is so the build + does not regress. + """ + if shutil.which("auditwheel") is None: + print( + "=== WARNING: auditwheel not found on PATH; keeping linux_ " + "wheel as-is. Install auditwheel in the build image to produce " + "PyPI-acceptable manylinux_2_X_ wheels.", + file=sys.stderr, + ) + cpdir("dist", dest_dir) + return + + dist_dir = os.path.join(whl_dir, "dist") + wheels = [ + os.path.join(dist_dir, w) for w in os.listdir(dist_dir) if w.endswith(".whl") + ] + fail_if(not wheels, "no wheel produced by setup.py") + + for wheel_path in wheels: + print(f"=== Running auditwheel repair on {wheel_path}") + r = subprocess.run( + ["auditwheel", "repair", wheel_path, "--wheel-dir", dest_dir], + capture_output=True, + text=True, + ) + # `auditwheel` logs via Python's logging module, which writes to + # stderr — the "no ELF" sentinel only appears there, not in + # stdout. See TRI-286 root-cause write-up. + if r.returncode != 0 and "no ELF" in r.stderr: + arch = os.uname().machine + manylinux_tag = f"manylinux_2_28_{arch}" + print( + f"=== Pure-Python wheel detected; falling back to wheel tags " + f"({manylinux_tag})" + ) + copied = os.path.join(dest_dir, os.path.basename(wheel_path)) + shutil.copy(wheel_path, copied) + # `wheel tags --remove` replaces the linux_ wheel in + # dest_dir with the correctly-tagged manylinux one. + r2 = subprocess.run( + [ + "python3", + "-m", + "wheel", + "tags", + "--platform-tag", + manylinux_tag, + "--remove", + copied, + ] + ) + fail_if(r2.returncode != 0, "wheel tags fallback failed") + elif r.returncode != 0: + sys.stderr.write(r.stderr) + fail_if(True, "auditwheel repair failed") + + if __name__ == "__main__": main() diff --git a/src/python/setup.py b/src/python/setup.py index 2c7c12a9ee..c7536c9d63 100755 --- a/src/python/setup.py +++ b/src/python/setup.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -26,34 +26,35 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os -import sys -from setuptools import find_packages, setup - -if "--plat-name" in sys.argv: - PLATFORM_FLAG = sys.argv[sys.argv.index("--plat-name") + 1] -else: - PLATFORM_FLAG = "any" +from setuptools import Distribution, find_packages, setup if "VERSION" not in os.environ: raise Exception("envvar VERSION must be specified") VERSION = os.environ["VERSION"] -try: - from wheel.bdist_wheel import bdist_wheel as _bdist_wheel - - class bdist_wheel(_bdist_wheel): - def finalize_options(self): - _bdist_wheel.finalize_options(self) - self.root_is_pure = False - def get_tag(self): - pyver, abi, plat = "py3", "none", PLATFORM_FLAG - return pyver, abi, plat +# The wheel bundles a CPython-ABI-specific binding +# (tritonfrontend/_c/.so, filename encodes e.g. "cpython-312-..."), +# so the wheel is only loadable under the matching interpreter and arch. +# The binding is copied into package_data at build time rather than +# declared via setup(ext_modules=...), so setuptools would otherwise +# treat the distribution as pure-Python and emit "Root-Is-Purelib: true" +# in the WHEEL metadata — which auditwheel rejects when it finds the +# .so embedded in the purelib tree. +# +# Signaling has_ext_modules()=True via a custom Distribution subclass +# is the canonical way to tell setuptools the wheel is binary without +# triggering a fake compilation step. setuptools then: +# - sets Root-Is-Purelib to false (required for auditwheel repair), +# - auto-derives the correct cp-cp-linux_ tag from +# the current interpreter and sysconfig.get_platform(). +# See TRI-983. +class BinaryDistribution(Distribution): + def has_ext_modules(self): + return True -except ImportError: - bdist_wheel = None this_directory = os.path.abspath(os.path.dirname(__file__)) @@ -105,7 +106,7 @@ def get_tag(self): "": platform_package_data, }, zip_safe=False, - cmdclass={"bdist_wheel": bdist_wheel}, + distclass=BinaryDistribution, data_files=data_files, install_requires=["tritonserver", "pydantic==2.10.6"], extras_require={"GPU": gpu_extras, "test": test_extras, "all": all_extras},