diff --git a/Dockerfile.QA b/Dockerfile.QA
index a8ffbd8a19..d605b12c13 100644
--- a/Dockerfile.QA
+++ b/Dockerfile.QA
@@ -66,7 +66,9 @@ RUN apt-get update && \
             software-properties-common && \
     rm -rf /var/lib/apt/lists/*
 
-RUN pip3 install cmake==4.0.3
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+RUN pip install cmake==4.0.3
 ENV CMAKE_POLICY_VERSION_MINIMUM=3.5
 
 # Add densenet_onnx model to example repo
@@ -348,7 +350,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 RUN rm -f /usr/bin/python && \
     ln -s /usr/bin/python3 /usr/bin/python
 
-RUN pip3 install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \
+RUN pip install --upgrade "numpy<2" pillow attrdict future "grpcio<1.68" requests gsutil \
                            "awscli<=1.36.40" six "grpcio-channelz<1.68" prettytable virtualenv \
                            check-jsonschema
 
@@ -377,7 +379,7 @@ COPY --chown=1000:1000 --from=sdk /workspace/qa/ qa/
 RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \
     find qa/pkgs/ -maxdepth 1 -type f -name \
     "tritonclient-*-py3-none-any.whl" | xargs printf -- '%s[all]' | \
-    xargs pip3 install --upgrade
+    xargs pip install --upgrade
 
 ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}
 
diff --git a/build.py b/build.py
index 774728f189..a60525e6a5 100755
--- a/build.py
+++ b/build.py
@@ -930,7 +930,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
 
 ARG TRITON_VERSION
 ARG TRITON_CONTAINER_VERSION
-ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
+ENV CMAKE_POLICY_VERSION_MINIMUM=3.5
 """
     df += """
 # Install docker docker buildx
@@ -994,15 +994,22 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
     df += change_default_python_version_rhel(FLAGS.rhel_py_version)
     df += """
 
-RUN pip3 install --upgrade pip \\
-      && pip3 install --upgrade \\
+# Create a dedicated virtualenv so pip installs are isolated from the
+# distro-managed system Python. Subsequent RUN steps pick up the
+# venv's pip/python via PATH.
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+
+RUN pip install --upgrade pip \\
+      && pip install --upgrade \\
           build \\
           wheel \\
           setuptools \\
           docker \\
           virtualenv \\
           patchelf==0.17.2 \\
-          cmake==4.0.3
+          cmake==4.0.3 \\
+          auditwheel
 """
     df += f"""
 # Install boost version >= 1.78 for boost::span
@@ -1047,7 +1054,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
 
 ARG TRITON_VERSION
 ARG TRITON_CONTAINER_VERSION
-ENV PIP_BREAK_SYSTEM_PACKAGES=1 CMAKE_POLICY_VERSION_MINIMUM=3.5
+ENV CMAKE_POLICY_VERSION_MINIMUM=3.5
 """
     # Install the windows- or linux-specific buildbase dependencies
     if target_platform() == "windows":
@@ -1094,9 +1101,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
             libb64-dev \\
             libgoogle-perftools-dev \\
             python3-dev \\
-            python3-pip \\
-            python3-wheel \\
-            python3-setuptools \\
+            python3-venv \\
             rapidjson-dev \\
             scons \\
             software-properties-common \\
@@ -1110,12 +1115,21 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
             wget \\
       && rm -rf /var/lib/apt/lists/*
 
-RUN pip3 install --upgrade \\
+# Create a dedicated virtualenv so pip installs are isolated from the
+# distro-managed system Python. Subsequent RUN steps pick up the
+# venv's pip/python via PATH.
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+
+RUN pip install --upgrade \\
           build \\
+          wheel \\
+          setuptools \\
           docker \\
           virtualenv \\
           patchelf==0.17.2 \\
           cmake==4.0.3 \\
+          auditwheel \\
           pybind11[global]
 """
 
@@ -1194,7 +1208,6 @@ def create_dockerfile_cibase(ddir, dockerfile_name, argmap):
 
 ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
 ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
-ENV PIP_BREAK_SYSTEM_PACKAGES=1
 """
 
     with open(os.path.join(ddir, dockerfile_name), "w") as dfile:
@@ -1241,14 +1254,26 @@ def create_dockerfile_linux(
 ##  Production stage: Create container with just inference server executable
 ############################################################################
 FROM ${BASE_IMAGE}
-
-ENV PIP_BREAK_SYSTEM_PACKAGES=1
 """
 
     df += dockerfile_prepare_container_linux(
         argmap, backends, FLAGS.enable_gpu, target_machine()
     )
 
+    # Create a dedicated virtualenv so the wheel + openai-requirements
+    # pip installs below run in isolation from the distro-managed
+    # system Python (replaces the legacy PIP_BREAK_SYSTEM_PACKAGES=1
+    # escape hatch). If the python-backend branch above already
+    # created /opt/venv-tritonserver (on top of pyenv / Ubuntu
+    # python3), re-running `python3 -m venv` is a safe no-op; on
+    # minimal builds without the python backend this is the first
+    # creation. Derived images (Dockerfile.QA) inherit the venv via
+    # PATH.
+    df += """
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+"""
+
     df += f"""
 WORKDIR /opt
 COPY --chown=1000:1000 build/install tritonserver
@@ -1260,7 +1285,7 @@ def create_dockerfile_linux(
     find /opt/tritonserver/python -maxdepth 1 -type f -name \\
     "tritonfrontend-*.whl" | xargs -I {{}} pip install --upgrade {{}}[{FLAGS.triton_wheels_dependencies_group}]
 
-RUN pip3 install -r python/openai/requirements.txt
+RUN pip install -r python/openai/requirements.txt
 
 """
     if not FLAGS.no_core_build:
@@ -1349,20 +1374,22 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
     if target_platform() == "rhel":
         df += """
 # Common dependencies.
-RUN yum install -y \\
+RUN dnf install -y \\
         git \\
         gperf \\
-        re2-devel \\
-        openssl-devel \\
-        libtool \\
-        libcurl-devel \\
-        libb64-devel \\
         gperftools-devel \\
-        wget \\
-        python3.12-pip \\
-        numactl-devel
+        libb64-devel \\
+        libcurl-devel \\
+        libtool \\
+        numactl-devel \\
+        openssl-devel \\
+        python3.12-venv \\
+        re2-devel \\
+        wget
 
-RUN pip3 install patchelf==0.17.2
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+RUN pip install patchelf==0.17.2
 
 """
     else:
@@ -1387,6 +1414,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
               wget \\
               {backend_dependencies} \\
               python3-pip \\
+              python3-venv \\
       && rm -rf /var/lib/apt/lists/*
 """.format(
             backend_dependencies=backend_dependencies
@@ -1438,8 +1466,14 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
             # Requires openssl-devel to be installed first for pyenv build to be successful
             df += change_default_python_version_rhel(FLAGS.rhel_py_version)
             df += """
-RUN pip3 install --upgrade pip \\
-    && pip3 install --upgrade \\
+# Create a dedicated virtualenv so pip installs are isolated from the
+# distro-managed system Python. Built after pyenv has provided the
+# desired Python version so the venv inherits that interpreter.
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+
+RUN pip install --upgrade pip \\
+    && pip install --upgrade \\
         wheel \\
         setuptools \\
         \"numpy<2\" \\
@@ -1451,15 +1485,23 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
 RUN apt-get update \\
       && apt-get install -y --no-install-recommends \\
             python3 \\
+            python3-venv \\
             libarchive-dev \\
             python3-pip \\
             python3-wheel \\
             python3-setuptools \\
             libpython3-dev \\
-      && pip3 install --upgrade \\
-            \"numpy<2\" \\
-            virtualenv \\
       && rm -rf /var/lib/apt/lists/*
+
+# Create a dedicated virtualenv so pip installs are isolated from the
+# distro-managed system Python. Subsequent RUN steps pick up the
+# venv's pip/python via PATH.
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+
+RUN pip install --upgrade \\
+        \"numpy<2\" \\
+        virtualenv
 """
     if "tensorrtllm" in backends or "vllm" in backends:
         df += """
@@ -1552,10 +1594,12 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
 
 COPY --from=min_container /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9 /usr/lib/{libs_arch}-linux-gnu/libcudnn.so.9
 
-# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so
+# patchelf is needed to add deps of libcublasLt.so.12 to libtorch_cuda.so.
 RUN apt-get update \\
-      && apt-get install -y --no-install-recommends openmpi-bin
-RUN pip3 install patchelf==0.17.2
+      && apt-get install -y --no-install-recommends openmpi-bin python3-venv
+RUN python3 -m venv /opt/venv-tritonserver
+ENV PATH="/opt/venv-tritonserver/bin:${PATH}"
+RUN pip install patchelf==0.17.2
 
 ENV LD_LIBRARY_PATH /usr/local/cuda/targets/{cuda_arch}-linux/lib:/usr/local/cuda/lib64/stubs:${{LD_LIBRARY_PATH}}
 """.format(
@@ -1806,6 +1850,53 @@ def create_docker_build_script(script_name, container_install_dir, container_ci_
             "tritonserver_builder",
         ]
 
+        # Propagate wheel-naming context into the build container so
+        # build_wheel.py can compose the full wheel filename. See
+        # TRI-983. Both CLI flags and host env vars are checked so the
+        # value is defined in CI and local builds alike:
+        #
+        # * CI_PIPELINE_ID — GitLab pipeline ID; shared across all jobs
+        #   in one pipeline so tritonserver and tritonfrontend wheels
+        #   from the same release carry the same PEP 427 build tag.
+        #   In CI, pass `--build-id=${CI_PIPELINE_ID}` to build.py.
+        # * NVIDIA_BUILD_ID — from --build-id; the primary vehicle for
+        #   CI_PIPELINE_ID into the container. build_wheel.py falls back
+        #   to this when CI_PIPELINE_ID is not exported directly.
+        # * NVIDIA_UPSTREAM_VERSION — primarily from
+        #   --upstream-container-version (CI:
+        #   `--upstream-container-version=${NVIDIA_UPSTREAM_VERSION}`;
+        #   local: DEFAULT_TRITON_VERSION_MAP default). Falls back to
+        #   the host env var when the CLI flag is empty so the
+        #   +nv<X> local-version segment is still applied even if
+        #   someone invokes build.py with `--upstream-container-version=`.
+        # * PYPI_RELEASE — when "true", build_wheel.py omits the
+        #   +nv<X>.cu<Y> local-version suffix so the resulting wheel
+        #   can be uploaded to PyPI (which rejects local versions).
+        #
+        # CUDA_VERSION is intentionally NOT propagated: the CUDA base
+        # image already sets it as an ENV inside the container, and
+        # the host/CI runner does not. Passing "-e CUDA_VERSION" with
+        # an empty host value would override (and erase) the
+        # container's value. build_wheel.py reads CUDA_VERSION from
+        # the container-local env (with a /usr/local/cuda/version.json
+        # fallback), which is where it is reliably set.
+        ci_pipeline_id = os.environ.get("CI_PIPELINE_ID")
+        if ci_pipeline_id:
+            runargs += ["-e", f"CI_PIPELINE_ID={ci_pipeline_id}"]
+        if FLAGS.build_id is not None:
+            runargs += ["-e", f"NVIDIA_BUILD_ID={FLAGS.build_id}"]
+        upstream_version = FLAGS.upstream_container_version or os.environ.get(
+            "NVIDIA_UPSTREAM_VERSION"
+        )
+        if upstream_version:
+            runargs += [
+                "-e",
+                f"NVIDIA_UPSTREAM_VERSION={upstream_version}",
+            ]
+        pypi_release = os.environ.get("PYPI_RELEASE")
+        if pypi_release:
+            runargs += ["-e", f"PYPI_RELEASE={pypi_release}"]
+
         if not FLAGS.no_container_interactive:
             runargs += ["-it"]
 
@@ -2849,6 +2940,20 @@ def enable_all():
 
     log("container version {}".format(FLAGS.container_version))
     log("upstream container version {}".format(FLAGS.upstream_container_version))
+    # Explicit visibility for wheel-naming inputs (see TRI-983). If
+    # these are empty here, the wheel filename will lack the expected
+    # build-tag / local-version segments and the log below tells us
+    # which link in the chain dropped the value.
+    log(
+        "wheel-naming inputs: --build-id={!r}, --upstream-container-version={!r}, "
+        "CI_PIPELINE_ID={!r}, env NVIDIA_UPSTREAM_VERSION={!r}, PYPI_RELEASE={!r}".format(
+            FLAGS.build_id,
+            FLAGS.upstream_container_version,
+            os.environ.get("CI_PIPELINE_ID"),
+            os.environ.get("NVIDIA_UPSTREAM_VERSION"),
+            os.environ.get("PYPI_RELEASE"),
+        )
+    )
 
     for ep in FLAGS.endpoint:
         log(f'endpoint "{ep}"')
diff --git a/src/python/build_wheel.py b/src/python/build_wheel.py
index 875dd32a70..ef358d0689 100755
--- a/src/python/build_wheel.py
+++ b/src/python/build_wheel.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -32,6 +32,7 @@
 import shutil
 import subprocess
 import sys
+import sysconfig
 from distutils.dir_util import copy_tree
 from tempfile import mkstemp
 
@@ -54,6 +55,100 @@ def cpdir(src, dest):
     copy_tree(src, dest, preserve_symlinks=1)
 
 
+def _detect_cuda_version():
+    """Detect the CUDA toolkit version visible to the build.
+
+    Prefers the CUDA_VERSION env var (set by official NVIDIA base
+    images); falls back to parsing /usr/local/cuda/version.json which
+    is the canonical location for the installed toolkit. Returns the
+    raw string (e.g. "13.2.1") or None when CUDA is not available.
+
+    CUDA_VERSION is only reliably set inside the build container (the
+    CUDA base image exports it) and must not be propagated from the
+    host — see the matching comment in build.py's docker-run
+    invocation.
+    """
+    v = os.environ.get("CUDA_VERSION")
+    if v:
+        return v
+    try:
+        import json as _json
+
+        with open("/usr/local/cuda/version.json") as f:
+            data = _json.load(f)
+        return data.get("cuda", {}).get("version")
+    except (OSError, ValueError, KeyError):
+        return None
+
+
+def _compose_version(base_version):
+    """Compose the full wheel version string.
+
+    The base version comes from TRITON_VERSION and may already include a
+    PEP 440 pre-release suffix (e.g. "2.69.0.dev0"). Append a PEP 440
+    local-version segment describing the NVIDIA container release and
+    CUDA toolkit the wheel was built against, so consumers can tell an
+    nv26.04 wheel from an nv26.05 wheel (same upstream Triton version)
+    and a cu132 wheel from a cu128 wheel. The local-version segment is
+    purely informational and does not affect pip's version comparison.
+
+    When PYPI_RELEASE=true the local-version suffix is omitted entirely:
+    PyPI rejects uploads whose version contains a '+' local segment, so
+    public release builds must use the bare version.
+
+    Sources for NVIDIA upstream version (first non-empty wins):
+      NVIDIA_UPSTREAM_VERSION        - propagated by build.py via
+                                       `docker run -e` from
+                                       FLAGS.upstream_container_version.
+      NVIDIA_TRITON_SERVER_VERSION   - set as ENV in the buildbase image
+                                       at image-build time from the
+                                       TRITON_CONTAINER_VERSION ARG
+                                       (survives even if the docker-run
+                                       `-e` forwarding is not applied).
+      TRITON_CONTAINER_VERSION       - set as ENV in some downstream
+                                       images; same value as above in CI.
+    Source for CUDA toolkit version:
+      CUDA_VERSION / toolkit         - discovered by _detect_cuda_version()
+
+    All sources are optional; if none is present the version is returned
+    unchanged so local non-CI builds stay stable. Each detection
+    outcome is logged to stderr so any future gap is self-announcing
+    in the build log rather than surfacing only as a missing suffix in
+    the wheel filename.
+    """
+    if os.environ.get("PYPI_RELEASE", "").lower() in ("1", "true", "yes"):
+        print(
+            "=== PYPI_RELEASE set: omitting local-version suffix for PyPI compatibility",
+            file=sys.stderr,
+        )
+        return base_version
+    nv = (
+        os.environ.get("NVIDIA_UPSTREAM_VERSION")
+        or os.environ.get("NVIDIA_TRITON_SERVER_VERSION")
+        or os.environ.get("TRITON_CONTAINER_VERSION")
+    )
+    cuda = _detect_cuda_version()
+    print(
+        f"=== Wheel local-version inputs: "
+        f"NVIDIA_UPSTREAM_VERSION={os.environ.get('NVIDIA_UPSTREAM_VERSION')!r} "
+        f"NVIDIA_TRITON_SERVER_VERSION={os.environ.get('NVIDIA_TRITON_SERVER_VERSION')!r} "
+        f"TRITON_CONTAINER_VERSION={os.environ.get('TRITON_CONTAINER_VERSION')!r} "
+        f"-> nv={nv!r}, cuda={cuda!r}",
+        file=sys.stderr,
+    )
+    local = []
+    if nv:
+        local.append(f"nv{nv}")
+    if cuda:
+        # "13.2" / "13.2.0" / "13.2.1" -> "cu132"
+        parts = cuda.split(".")
+        if len(parts) >= 2 and parts[0].isdigit() and parts[1].isdigit():
+            local.append(f"cu{parts[0]}{parts[1]}")
+    if local:
+        return f"{base_version}+{'.'.join(local)}"
+    return base_version
+
+
 def sed(pattern, replace, source, dest=None):
     name = None
     if dest:
@@ -115,21 +210,134 @@ def main():
     shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py"))
 
     os.chdir(FLAGS.whl_dir)
+    # The wheel ships an arch-specific CPython extension
+    # (tritonfrontend/_c/<pybind>.so). Pass --plat-name so the wheel is
+    # tagged with the current platform (e.g. linux_x86_64 / linux_aarch64)
+    # instead of the misleading "none-any".
+    plat_name = sysconfig.get_platform().replace("-", "_").replace(".", "_")
     print("=== Building wheel")
-    args = ["python3", "setup.py", "bdist_wheel"]
+    args = ["python3", "setup.py", "bdist_wheel", "--plat-name", plat_name]
+    # PEP 427 "build tag": an optional segment between version and
+    # python-tag that lets two wheels of the same version coexist
+    # (e.g. re-runs of the same pipeline). Sources, first non-empty
+    # and usable wins:
+    #   CI_PIPELINE_ID  - GitLab pipeline ID; shared by all jobs in one
+    #                     pipeline so tritonserver and tritonfrontend
+    #                     wheels from the same release carry the same
+    #                     tag. In CI, build.py is invoked with
+    #                     `--build-id=${CI_PIPELINE_ID}`.
+    #   NVIDIA_BUILD_ID - set from build.py's --build-id flag; primary
+    #                     vehicle for CI_PIPELINE_ID into the container.
+    #   BUILD_NUMBER    - generic CI systems that use this instead.
+    # PEP 427 requires the build tag to start with a digit. Skip the
+    # slot when the value does not satisfy that constraint or is the
+    # "<unknown>" default emitted for local builds without --build-id.
+    if os.environ.get("PYPI_RELEASE", "").lower() in ("1", "true", "yes"):
+        build_tag = None
+    else:
+        build_tag = (
+            os.environ.get("CI_PIPELINE_ID")
+            or os.environ.get("NVIDIA_BUILD_ID")
+            or os.environ.get("BUILD_NUMBER")
+        )
+    print(
+        f"=== Wheel build-tag inputs: "
+        f"PYPI_RELEASE={os.environ.get('PYPI_RELEASE')!r} "
+        f"CI_PIPELINE_ID={os.environ.get('CI_PIPELINE_ID')!r} "
+        f"NVIDIA_BUILD_ID={os.environ.get('NVIDIA_BUILD_ID')!r} "
+        f"BUILD_NUMBER={os.environ.get('BUILD_NUMBER')!r} "
+        f"-> build-tag={build_tag!r}",
+        file=sys.stderr,
+    )
+    if build_tag and build_tag != "<unknown>" and build_tag[:1].isdigit():
+        args += ["--build-number", build_tag]
 
     wenv = os.environ.copy()
-    wenv["VERSION"] = FLAGS.triton_version
+    wenv["VERSION"] = _compose_version(FLAGS.triton_version)
     wenv["TRITON_PYBIND"] = PYBIND_LIB
     p = subprocess.Popen(args, env=wenv)
     p.wait()
     fail_if(p.returncode != 0, "setup.py failed")
 
-    cpdir("dist", FLAGS.dest_dir)
+    # Post-process with auditwheel so the wheel is tagged with a proper
+    # manylinux_2_X_<arch> platform (required by canonical PyPI). When
+    # auditwheel is unavailable in the build image we keep the
+    # linux_<arch> wheel and emit a warning; the Poetry/pip lock-file
+    # problem is already solved by the distinct filename, and the tag can
+    # be fixed up in a follow-up publish step if needed.
+    _repair_wheel_with_auditwheel(FLAGS.whl_dir, FLAGS.dest_dir)
 
     print(f"=== Output wheel file is in: {FLAGS.dest_dir}")
     touch(os.path.join(FLAGS.dest_dir, "stamp.whl"))
 
 
+def _repair_wheel_with_auditwheel(whl_dir, dest_dir):
+    """Upgrade a linux_<arch> wheel to manylinux_2_X_<arch>.
+
+    Ports the pattern established for tritonclient in TRI-286:
+      1. auditwheel repair   — auto-discovers the minimum manylinux tag
+         by inspecting glibc symbol requirements of the embedded .so.
+      2. python -m wheel tags fallback — used when auditwheel reports
+         "no ELF" (the wheel has no native extension, e.g. a downstream
+         build disabled bindings). Mirrors the documented fallback.
+      3. No-op with warning — when auditwheel is not installed in the
+         build image, keep the linux_<arch> wheel as-is so the build
+         does not regress.
+    """
+    if shutil.which("auditwheel") is None:
+        print(
+            "=== WARNING: auditwheel not found on PATH; keeping linux_<arch> "
+            "wheel as-is. Install auditwheel in the build image to produce "
+            "PyPI-acceptable manylinux_2_X_<arch> wheels.",
+            file=sys.stderr,
+        )
+        cpdir("dist", dest_dir)
+        return
+
+    dist_dir = os.path.join(whl_dir, "dist")
+    wheels = [
+        os.path.join(dist_dir, w) for w in os.listdir(dist_dir) if w.endswith(".whl")
+    ]
+    fail_if(not wheels, "no wheel produced by setup.py")
+
+    for wheel_path in wheels:
+        print(f"=== Running auditwheel repair on {wheel_path}")
+        r = subprocess.run(
+            ["auditwheel", "repair", wheel_path, "--wheel-dir", dest_dir],
+            capture_output=True,
+            text=True,
+        )
+        # `auditwheel` logs via Python's logging module, which writes to
+        # stderr — the "no ELF" sentinel only appears there, not in
+        # stdout. See TRI-286 root-cause write-up.
+        if r.returncode != 0 and "no ELF" in r.stderr:
+            arch = os.uname().machine
+            manylinux_tag = f"manylinux_2_28_{arch}"
+            print(
+                f"=== Pure-Python wheel detected; falling back to wheel tags "
+                f"({manylinux_tag})"
+            )
+            copied = os.path.join(dest_dir, os.path.basename(wheel_path))
+            shutil.copy(wheel_path, copied)
+            # `wheel tags --remove` replaces the linux_<arch> wheel in
+            # dest_dir with the correctly-tagged manylinux one.
+            r2 = subprocess.run(
+                [
+                    "python3",
+                    "-m",
+                    "wheel",
+                    "tags",
+                    "--platform-tag",
+                    manylinux_tag,
+                    "--remove",
+                    copied,
+                ]
+            )
+            fail_if(r2.returncode != 0, "wheel tags fallback failed")
+        elif r.returncode != 0:
+            sys.stderr.write(r.stderr)
+            fail_if(True, "auditwheel repair failed")
+
+
 if __name__ == "__main__":
     main()
diff --git a/src/python/setup.py b/src/python/setup.py
index 2c7c12a9ee..c7536c9d63 100755
--- a/src/python/setup.py
+++ b/src/python/setup.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -26,34 +26,35 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
-import sys
 
-from setuptools import find_packages, setup
-
-if "--plat-name" in sys.argv:
-    PLATFORM_FLAG = sys.argv[sys.argv.index("--plat-name") + 1]
-else:
-    PLATFORM_FLAG = "any"
+from setuptools import Distribution, find_packages, setup
 
 if "VERSION" not in os.environ:
     raise Exception("envvar VERSION must be specified")
 
 VERSION = os.environ["VERSION"]
 
-try:
-    from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
-
-    class bdist_wheel(_bdist_wheel):
-        def finalize_options(self):
-            _bdist_wheel.finalize_options(self)
-            self.root_is_pure = False
 
-        def get_tag(self):
-            pyver, abi, plat = "py3", "none", PLATFORM_FLAG
-            return pyver, abi, plat
+# The wheel bundles a CPython-ABI-specific binding
+# (tritonfrontend/_c/<pybind>.so, filename encodes e.g. "cpython-312-..."),
+# so the wheel is only loadable under the matching interpreter and arch.
+# The binding is copied into package_data at build time rather than
+# declared via setup(ext_modules=...), so setuptools would otherwise
+# treat the distribution as pure-Python and emit "Root-Is-Purelib: true"
+# in the WHEEL metadata — which auditwheel rejects when it finds the
+# .so embedded in the purelib tree.
+#
+# Signaling has_ext_modules()=True via a custom Distribution subclass
+# is the canonical way to tell setuptools the wheel is binary without
+# triggering a fake compilation step. setuptools then:
+#   - sets Root-Is-Purelib to false (required for auditwheel repair),
+#   - auto-derives the correct cp<XY>-cp<XY>-linux_<arch> tag from
+#     the current interpreter and sysconfig.get_platform().
+# See TRI-983.
+class BinaryDistribution(Distribution):
+    def has_ext_modules(self):
+        return True
 
-except ImportError:
-    bdist_wheel = None
 
 this_directory = os.path.abspath(os.path.dirname(__file__))
 
@@ -105,7 +106,7 @@ def get_tag(self):
         "": platform_package_data,
     },
     zip_safe=False,
-    cmdclass={"bdist_wheel": bdist_wheel},
+    distclass=BinaryDistribution,
     data_files=data_files,
     install_requires=["tritonserver", "pydantic==2.10.6"],
     extras_require={"GPU": gpu_extras, "test": test_extras, "all": all_extras},