diff --git a/.gitignore b/.gitignore
index 31f5d9f5f4..471832c119 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ qa/L0_openai/openai
 tensorrtllm_models
 tensorrtllm_mistral_models/
 custom_tokenizer
+CMakeUserPresets.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 68f68e1a69..8b555ba9bb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,10 +30,20 @@ project(tritonserver LANGUAGES C CXX)
 
 include(CMakeDependentOption)
 
+# Expose local cmake/ modules (FindDCGM, TritonCheckCxxAbi, etc.)
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+# Validate that the C++ ABI (_GLIBCXX_USE_CXX11_ABI) matches the ABI used
+# to compile Conan-provided binary packages.  Must run after the toolchain
+# file (conan_toolchain.cmake) has been processed so CONAN_COMPILER_LIBCXX
+# is populated.
+include(TritonCheckCxxAbi)
+triton_check_cxx_abi()
+
 # Use C++17 standard as Triton's minimum required.
 set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.")
 
-set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" )
+set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library")
 
 option(TRITON_ENABLE_LOGGING "Include logging support in server" ON)
 option(TRITON_ENABLE_STATS "Include statistics collections in server" ON)
@@ -41,9 +51,9 @@ option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF)
 option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF)
 option(TRITON_ENABLE_GPU "Enable GPU support in server" ON)
 option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF)
-option(TRITON_IGPU_BUILD "Enable options for iGPU compilation in sever" OFF)
+option(TRITON_IGPU_BUILD "Enable options for iGPU compilation in server" OFF)
 set(TRITON_MIN_COMPUTE_CAPABILITY "7.5" CACHE STRING
-    "The minimum CUDA compute capability supported by Triton" )
+    "The minimum CUDA compute capability supported by Triton")
 set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build")
 
 # Ensemble
@@ -71,18 +81,72 @@ option(TRITON_ENABLE_TENSORRT "Include TensorRT backend in server" OFF)
 # ASAN
 option(TRITON_ENABLE_ASAN "Build with address sanitizer" OFF)
 
-# Repo tags
-set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE STRING "Git repository to pull from")
-set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING
-    "Tag for triton-inference-server/third_party repo")
-set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
-set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
-set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
-
-# Third-party location
-set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build")
-set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source")
+# Skip FetchContent/ExternalProject for third-party deps when Conan provides them
+option(TRITON_SKIP_THIRD_PARTY_FETCH
+  "Skip FetchContent/ExternalProject for third-party deps (provided by Conan)" ON)
+
+# ---------------------------------------------------------------------------
+# Backend implementations — each defaults OFF; toggle ON to include in build.
+# Source dirs default to sibling checkout paths; override via -D if needed.
+# ---------------------------------------------------------------------------
+option(TRITON_ENABLE_PYTHON_BACKEND      "Build Python backend"          OFF)
+option(TRITON_ENABLE_ONNXRUNTIME_BACKEND "Build ONNX Runtime backend"    OFF)
+option(TRITON_ENABLE_PYTORCH_BACKEND     "Build PyTorch backend"         OFF)
+option(TRITON_ENABLE_TENSORRT_BACKEND    "Build TensorRT backend"        OFF)
+option(TRITON_ENABLE_OPENVINO_BACKEND    "Build OpenVINO backend"        OFF)
+option(TRITON_ENABLE_DALI_BACKEND        "Build DALI backend"            OFF)
+option(TRITON_ENABLE_FIL_BACKEND         "Build FIL backend"             OFF)
+option(TRITON_ENABLE_VLLM_BACKEND        "Build vLLM backend"            OFF)
+option(TRITON_ENABLE_TENSORRTLLM_BACKEND "Build TensorRT-LLM backend"    OFF)
+option(TRITON_ENABLE_IDENTITY_BACKEND    "Build Identity (test) backend" OFF)
+option(TRITON_ENABLE_REPEAT_BACKEND      "Build Repeat (test) backend"   OFF)
+option(TRITON_ENABLE_SQUARE_BACKEND      "Build Square (test) backend"   OFF)
+
+# Source dirs — prefer sibling checkouts; FetchContent is the fallback.
+# Override per-backend with -DTRITON_<NAME>_BACKEND_SOURCE_DIR=<path>.
+set(TRITON_PYTHON_BACKEND_SOURCE_DIR      "${CMAKE_CURRENT_SOURCE_DIR}/../python_backend"
+    CACHE PATH "Local checkout of python_backend repo")
+set(TRITON_ONNXRUNTIME_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../onnxruntime_backend"
+    CACHE PATH "Local checkout of onnxruntime_backend repo")
+set(TRITON_PYTORCH_BACKEND_SOURCE_DIR     "${CMAKE_CURRENT_SOURCE_DIR}/../pytorch_backend"
+    CACHE PATH "Local checkout of pytorch_backend repo")
+set(TRITON_TENSORRT_BACKEND_SOURCE_DIR    "${CMAKE_CURRENT_SOURCE_DIR}/../tensorrt_backend"
+    CACHE PATH "Local checkout of tensorrt_backend repo")
+set(TRITON_OPENVINO_BACKEND_SOURCE_DIR    "${CMAKE_CURRENT_SOURCE_DIR}/../openvino_backend"
+    CACHE PATH "Local checkout of openvino_backend repo")
+set(TRITON_DALI_BACKEND_SOURCE_DIR        "${CMAKE_CURRENT_SOURCE_DIR}/../dali_backend"
+    CACHE PATH "Local checkout of dali_backend repo")
+set(TRITON_FIL_BACKEND_SOURCE_DIR         "${CMAKE_CURRENT_SOURCE_DIR}/../fil_backend"
+    CACHE PATH "Local checkout of fil_backend repo")
+set(TRITON_VLLM_BACKEND_SOURCE_DIR        "${CMAKE_CURRENT_SOURCE_DIR}/../vllm_backend"
+    CACHE PATH "Local checkout of vllm_backend repo")
+set(TRITON_TENSORRTLLM_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../tensorrtllm_backend"
+    CACHE PATH "Local checkout of tensorrtllm_backend repo")
+set(TRITON_IDENTITY_BACKEND_SOURCE_DIR    "${CMAKE_CURRENT_SOURCE_DIR}/../identity_backend"
+    CACHE PATH "Local checkout of identity_backend repo")
+set(TRITON_REPEAT_BACKEND_SOURCE_DIR      "${CMAKE_CURRENT_SOURCE_DIR}/../repeat_backend"
+    CACHE PATH "Local checkout of repeat_backend repo")
+set(TRITON_SQUARE_BACKEND_SOURCE_DIR      "${CMAKE_CURRENT_SOURCE_DIR}/../square_backend"
+    CACHE PATH "Local checkout of square_backend repo")
+
+# Git tags used when a backend is fetched (no local checkout found).
+# Override per-backend with -DTRITON_<NAME>_BACKEND_REPO_TAG=<ref>.
+set(TRITON_PYTHON_BACKEND_REPO_TAG      "main" CACHE STRING "Git ref for python_backend FetchContent")
+set(TRITON_ONNXRUNTIME_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for onnxruntime_backend FetchContent")
+set(TRITON_PYTORCH_BACKEND_REPO_TAG     "main" CACHE STRING "Git ref for pytorch_backend FetchContent")
+set(TRITON_TENSORRT_BACKEND_REPO_TAG    "main" CACHE STRING "Git ref for tensorrt_backend FetchContent")
+set(TRITON_OPENVINO_BACKEND_REPO_TAG    "main" CACHE STRING "Git ref for openvino_backend FetchContent")
+set(TRITON_DALI_BACKEND_REPO_TAG        "main" CACHE STRING "Git ref for dali_backend FetchContent")
+set(TRITON_FIL_BACKEND_REPO_TAG         "main" CACHE STRING "Git ref for fil_backend FetchContent")
+set(TRITON_VLLM_BACKEND_REPO_TAG        "main" CACHE STRING "Git ref for vllm_backend FetchContent")
+set(TRITON_TENSORRTLLM_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for tensorrtllm_backend FetchContent")
+set(TRITON_IDENTITY_BACKEND_REPO_TAG    "main" CACHE STRING "Git ref for identity_backend FetchContent")
+set(TRITON_REPEAT_BACKEND_REPO_TAG      "main" CACHE STRING "Git ref for repeat_backend FetchContent")
+set(TRITON_SQUARE_BACKEND_REPO_TAG      "main" CACHE STRING "Git ref for square_backend FetchContent")
 
+#
+# Validation
+#
 if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS)
   message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON")
 endif()
@@ -91,15 +155,15 @@ if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS)
   message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON")
 endif()
 
-if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
+if(TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
   message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON")
 endif()
 
-if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
+if(TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
   message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON")
 endif()
 
-if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
+if(TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
   message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON")
 endif()
 
@@ -108,162 +172,431 @@ if(TRITON_ENABLE_ASAN AND TRITON_ENABLE_GPU)
 endif()
 
 #
-# Dependencies
+# Internal Triton repos — prefer local checkouts, fall back to FetchContent.
+# Override source dirs via -DTRITON_*_SOURCE_DIR=<path>.
+# Override fetch tags via -DTRITON_*_REPO_TAG=<branch|tag|SHA>.
 #
+set(TRITON_COMMON_SOURCE_DIR  "${CMAKE_CURRENT_SOURCE_DIR}/../common"
+    CACHE PATH "Local checkout of triton-inference-server/common (empty = FetchContent)")
+set(TRITON_CORE_SOURCE_DIR    "${CMAKE_CURRENT_SOURCE_DIR}/../core"
+    CACHE PATH "Local checkout of triton-inference-server/core (empty = FetchContent)")
+set(TRITON_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../backend"
+    CACHE PATH "Local checkout of triton-inference-server/backend (empty = FetchContent)")
+
+set(TRITON_COMMON_REPO_TAG  "main" CACHE STRING "Git tag/branch for common when using FetchContent")
+set(TRITON_CORE_REPO_TAG    "main" CACHE STRING "Git tag/branch for core when using FetchContent")
+set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Git tag/branch for backend when using FetchContent")
+
 include(FetchContent)
 
-FetchContent_Declare(
-  repo-core
-  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
-  GIT_TAG ${TRITON_CORE_REPO_TAG}
-)
-FetchContent_Declare(
-  repo-third-party
-  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/third_party.git
-  GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
-)
+macro(triton_add_internal_repo _name _src_dir _repo_url _repo_tag _subdir)
+  if(EXISTS "${${_src_dir}}/CMakeLists.txt")
+    message(STATUS "triton/${_name}: using local checkout at ${${_src_dir}}")
+    add_subdirectory(${${_src_dir}} ${_subdir})
+  else()
+    message(STATUS "triton/${_name}: local checkout not found — fetching ${${_repo_tag}} from ${_repo_url}")
+    FetchContent_Declare(
+      ${_name}
+      GIT_REPOSITORY ${_repo_url}
+      GIT_TAG        ${${_repo_tag}}
+      GIT_SHALLOW    TRUE
+    )
+    FetchContent_MakeAvailable(${_name})
+  endif()
+endmacro()
 
-# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead
-# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos
-set(LIB_DIR "lib")
-if(LINUX)
-  file(STRINGS "/etc/os-release" DISTRO_ID_LIKE REGEX "ID_LIKE")
-  if(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
-    set (LIB_DIR "lib64")
-  endif(${DISTRO_ID_LIKE} MATCHES "rhel|centos")
-endif(LINUX)
 set(TRITON_CORE_HEADERS_ONLY OFF)
 
-FetchContent_MakeAvailable(repo-third-party repo-core)
-
 #
-# Triton server executable and examples
+# External C++ deps — resolved by Conan; conan_toolchain.cmake sets CMAKE_PREFIX_PATH.
+# These must be found BEFORE add_subdirectory(core) so that imported targets
+# (e.g. cnmem::cnmem) exist when core/src/CMakeLists.txt evaluates them.
 #
+find_package(re2 REQUIRED)
+find_package(Protobuf CONFIG REQUIRED)
+find_package(RapidJSON CONFIG REQUIRED)
+find_package(Boost REQUIRED)
 
-# Need to use ExternalProject for our builds so that we can get the
-# correct dependencies between Triton executable and the
-# ExternalProject dependencies (found in the third_party repo)
-include(ExternalProject)
+if(TRITON_ENABLE_GRPC)
+  find_package(gRPC CONFIG REQUIRED)
+endif()
 
-# If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external
-# projects.
-set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
-if (CMAKE_TOOLCHAIN_FILE)
-  set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
+if(TRITON_ENABLE_HTTP OR TRITON_ENABLE_METRICS OR
+   TRITON_ENABLE_SAGEMAKER OR TRITON_ENABLE_VERTEX_AI)
+  find_package(Libevent CONFIG REQUIRED)
+  find_package(libevhtp CONFIG REQUIRED)
 endif()
 
-# If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external
-# projects.
-set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
-if (VCPKG_TARGET_TRIPLET)
-  set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
+if(TRITON_ENABLE_METRICS)
+  find_package(prometheus-cpp CONFIG REQUIRED)
 endif()
 
-# If OPENSSL_ROOT_DIR is set, propagate that hint path to the external
-# projects with OpenSSL dependency.
-set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
-if (OPENSSL_ROOT_DIR)
-  set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
+if(TRITON_ENABLE_TRACING)
+  find_package(opentelemetry-cpp CONFIG REQUIRED)
+  find_package(CURL CONFIG REQUIRED)
+  find_package(nlohmann_json CONFIG REQUIRED)
 endif()
 
-# Location where protobuf-config.cmake will be installed varies by
-# platform
-if (WIN32)
-  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
-else()
-  set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf")
+if(TRITON_ENABLE_GCS)
+  find_package(google_cloud_cpp_storage CONFIG REQUIRED)
+  find_package(Crc32c CONFIG REQUIRED)
 endif()
 
-# Triton with Opentelemetry is not supported on Windows
-# FIXME: add location for Windows, when support is added
-# JIRA DLIS-4786
-if (WIN32)
-  set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "")
-else()
-  set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/opentelemetry-cpp/${LIB_DIR}/cmake/opentelemetry-cpp")
+if(TRITON_ENABLE_S3)
+  find_package(AWSSDK CONFIG REQUIRED COMPONENTS s3)
+endif()
+
+if(TRITON_ENABLE_AZURE_STORAGE)
+  find_package(azure-storage-blobs-cpp CONFIG REQUIRED)
 endif()
 
+if(TRITON_ENABLE_GPU)
+  # Must be found before add_subdirectory(core): core/src/CMakeLists.txt uses
+  # if(TARGET cnmem::cnmem) to select the Conan-provided target vs the fallback.
+  find_package(cnmem CONFIG REQUIRED)
+  find_package(DCGM CONFIG REQUIRED)
+endif()
+
+# Enable protobuf and gRPC in common before adding it as a subdirectory so
+# that model_config.pb.h and proto targets are generated when core/src needs them.
+set(TRITON_COMMON_ENABLE_PROTOBUF ON CACHE BOOL "Enable protobuf in common" FORCE)
+if(TRITON_ENABLE_GRPC)
+  set(TRITON_COMMON_ENABLE_GRPC ON CACHE BOOL "Enable gRPC in common" FORCE)
+endif()
+set(TRITON_COMMON_ENABLE_JSON ON CACHE BOOL "Enable JSON in common" FORCE)
+
+triton_add_internal_repo(
+  triton-common TRITON_COMMON_SOURCE_DIR
+  "https://github.com/triton-inference-server/common.git"
+  TRITON_COMMON_REPO_TAG triton-common)
+
+triton_add_internal_repo(
+  triton-core TRITON_CORE_SOURCE_DIR
+  "https://github.com/triton-inference-server/core.git"
+  TRITON_CORE_REPO_TAG triton-core-top)
+
+triton_add_internal_repo(
+  triton-backend TRITON_BACKEND_SOURCE_DIR
+  "https://github.com/triton-inference-server/backend.git"
+  TRITON_BACKEND_REPO_TAG triton-backend)
+
+# Compatibility aliases for legacy FetchContent variable names still used by
+# tests in server/src/test/ (repo-core_SOURCE_DIR, etc.).
+set(repo-common_SOURCE_DIR "${TRITON_COMMON_SOURCE_DIR}"  CACHE INTERNAL "")
+set(repo-core_SOURCE_DIR   "${TRITON_CORE_SOURCE_DIR}"    CACHE INTERNAL "")
+set(repo-backend_SOURCE_DIR "${TRITON_BACKEND_SOURCE_DIR}" CACHE INTERNAL "")
+
 if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
   set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
 else()
   set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
 endif()
 
-set(TRITON_DEPENDS triton-core protobuf googletest re2)
-if(${TRITON_ENABLE_GCS})
-  set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp)
-endif() # TRITON_ENABLE_GCS
-if(${TRITON_ENABLE_S3})
-  set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp)
-endif() # TRITON_ENABLE_S3
-if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
-  set(TRITON_DEPENDS ${TRITON_DEPENDS} libevent libevhtp)
-endif() # TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS || TRITON_ENABLE_SAGEMAKER || TRITON_ENABLE_VERTEX_AI
-if(${TRITON_ENABLE_GRPC})
-  set(TRITON_DEPENDS ${TRITON_DEPENDS} grpc)
-endif() # TRITON_ENABLE_GRPC
-if(NOT WIN32 AND ${TRITON_ENABLE_TRACING})
-  set(TRITON_DEPENDS ${TRITON_DEPENDS} opentelemetry-cpp)
-endif() # TRITON_ENABLE_TRACING
-
-ExternalProject_Add(triton-server
-  PREFIX triton-server
-  SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src"
-  BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-server"
-  CMAKE_CACHE_ARGS
-    -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
-    ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
-    ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
-    ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-    -DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
-    -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-    -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares
-    -Dre2_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/re2/${LIB_DIR}/cmake/re2
-    -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl
-    -DCURL_DIR:STRING=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL
-    -Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/share/cmake/nlohmann_json
-    -DLibevent_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevent/lib/cmake/libevent
-    -Dlibevhtp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevhtp/lib/cmake/libevhtp
-    -Dstorage_client_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/storage_client
-    -Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/google_cloud_cpp_common
-    -DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/${LIB_DIR}/cmake/Crc32c
-    -DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/AWSSDK
-    -Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-core
-    -Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-s3
-    -Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-event-stream/cmake
-    -Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-common/cmake
-    -Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-checksums/cmake
-    -Dopentelemetry-cpp_DIR:PATH=${_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR}
-    -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION}
-    -DTRITON_IGPU_BUILD:BOOL=${TRITON_IGPU_BUILD}
-    -DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG}
-    -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-    -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG}
-    -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG}
-    -DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS}
-    -DTRITON_ENABLE_ASAN:BOOL=${TRITON_ENABLE_ASAN}
-    -DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX}
-    -DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING}
-    -DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING}
-    -DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS}
-    -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-    -DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU}
-    -DTRITON_ENABLE_HTTP:BOOL=${TRITON_ENABLE_HTTP}
-    -DTRITON_ENABLE_SAGEMAKER:BOOL=${TRITON_ENABLE_SAGEMAKER}
-    -DTRITON_ENABLE_VERTEX_AI:BOOL=${TRITON_ENABLE_VERTEX_AI}
-    -DTRITON_ENABLE_GRPC:BOOL=${TRITON_ENABLE_GRPC}
-    -DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY}
-    -DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS}
-    -DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU}
-    -DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU}
-    -DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS}
-    -DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE}
-    -DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3}
-    -DTRITON_ENABLE_TENSORRT:BOOL=${TRITON_ENABLE_TENSORRT}
-    -DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE}
-    -DTRITON_MIN_CXX_STANDARD:STRING=${TRITON_MIN_CXX_STANDARD}
-    -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-    -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
-    -DTRITON_VERSION:STRING=${TRITON_VERSION}
-  DEPENDS ${TRITON_DEPENDS}
+#
+# Triton server executable
+#
+add_subdirectory(src)
+
+# ---------------------------------------------------------------------------
+# Backend implementations
+#
+# Each backend is included via add_subdirectory.  When enabled but no local
+# checkout is found, FetchContent downloads the repo automatically (requires
+# network access at configure time).
+#
+# Build a single backend:
+#   cmake --build --preset <preset> --target <name>-backend
+#
+# Build all enabled backends:
+#   cmake --build --preset <preset> --target backends
+#
+# Install all enabled backends (produces deployable backend packages):
+#   cmake --install build/<preset> --prefix /opt/tritonserver
+#
+# For standalone backend builds (without the full server), backends can link
+# against the triton::tritonserver_stub interface target which provides the
+# TRITONSERVER_* API headers without requiring libtritonserver.so at build time.
+# ---------------------------------------------------------------------------
+
+# Controls whether missing backend checkouts are fetched automatically.
+option(TRITON_FETCH_MISSING_BACKENDS
+  "Automatically fetch backends via FetchContent when local checkout is absent" ON)
+
+# All backends pass these flags into their sub-build so they skip their own
+# FetchContent fetches for common/core (already in scope from this build).
+set(_backend_passthrough
+  -DTRITON_SKIP_THIRD_PARTY_FETCH=ON
+  -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU}
+)
+
+# _triton_backend_label tracks which add_subdirectory labels were registered
+# so we can build the 'backends' aggregate target afterwards.
+set(_triton_backend_labels "")
+
+macro(triton_add_backend _opt _dir _repo_name _repo_tag_var _label)
+  if(${_opt})
+    if(EXISTS "${${_dir}}/CMakeLists.txt")
+      message(STATUS "triton/${_repo_name}: using local checkout at ${${_dir}}")
+      add_subdirectory("${${_dir}}" "${_label}")
+      list(APPEND _triton_backend_labels "${_label}")
+    elseif(EXISTS "${${_dir}}")
+      # Directory present but no CMakeLists.txt — Python-only backend.
+      # No CMake compilation step; deployment is via pip install / wheel.
+      message(STATUS "triton/${_repo_name}: Python-only backend at ${${_dir}} — no CMake build step")
+    elseif(TRITON_FETCH_MISSING_BACKENDS)
+      message(STATUS "triton/${_repo_name}: not found locally — fetching "
+              "https://github.com/triton-inference-server/${_repo_name}.git "
+              "@${${_repo_tag_var}}")
+      FetchContent_Declare(
+        ${_repo_name}
+        GIT_REPOSITORY "https://github.com/triton-inference-server/${_repo_name}.git"
+        GIT_TAG        "${${_repo_tag_var}}"
+        GIT_SHALLOW    TRUE
+        SOURCE_DIR     "${${_dir}}"
+      )
+      FetchContent_GetProperties(${_repo_name})
+      if(NOT ${_repo_name}_POPULATED)
+        FetchContent_Populate(${_repo_name})
+      endif()
+      if(EXISTS "${${_dir}}/CMakeLists.txt")
+        add_subdirectory("${${_dir}}" "${_label}")
+        list(APPEND _triton_backend_labels "${_label}")
+      else()
+        message(STATUS "triton/${_repo_name}: fetched — Python-only backend, no CMake build step")
+      endif()
+    else()
+      message(WARNING
+        "${_opt}=ON but '${${_dir}}' not found and "
+        "TRITON_FETCH_MISSING_BACKENDS=OFF — skipping ${_repo_name}")
+    endif()
+  endif()
+endmacro()
+
+triton_add_backend(TRITON_ENABLE_PYTHON_BACKEND      TRITON_PYTHON_BACKEND_SOURCE_DIR
+  python_backend      TRITON_PYTHON_BACKEND_REPO_TAG      python_backend)
+triton_add_backend(TRITON_ENABLE_ONNXRUNTIME_BACKEND TRITON_ONNXRUNTIME_BACKEND_SOURCE_DIR
+  onnxruntime_backend TRITON_ONNXRUNTIME_BACKEND_REPO_TAG onnxruntime_backend)
+triton_add_backend(TRITON_ENABLE_PYTORCH_BACKEND     TRITON_PYTORCH_BACKEND_SOURCE_DIR
+  pytorch_backend     TRITON_PYTORCH_BACKEND_REPO_TAG     pytorch_backend)
+triton_add_backend(TRITON_ENABLE_TENSORRT_BACKEND    TRITON_TENSORRT_BACKEND_SOURCE_DIR
+  tensorrt_backend    TRITON_TENSORRT_BACKEND_REPO_TAG    tensorrt_backend)
+triton_add_backend(TRITON_ENABLE_OPENVINO_BACKEND    TRITON_OPENVINO_BACKEND_SOURCE_DIR
+  openvino_backend    TRITON_OPENVINO_BACKEND_REPO_TAG    openvino_backend)
+triton_add_backend(TRITON_ENABLE_DALI_BACKEND        TRITON_DALI_BACKEND_SOURCE_DIR
+  dali_backend        TRITON_DALI_BACKEND_REPO_TAG        dali_backend)
+triton_add_backend(TRITON_ENABLE_FIL_BACKEND         TRITON_FIL_BACKEND_SOURCE_DIR
+  fil_backend         TRITON_FIL_BACKEND_REPO_TAG         fil_backend)
+triton_add_backend(TRITON_ENABLE_VLLM_BACKEND        TRITON_VLLM_BACKEND_SOURCE_DIR
+  vllm_backend        TRITON_VLLM_BACKEND_REPO_TAG        vllm_backend)
+triton_add_backend(TRITON_ENABLE_TENSORRTLLM_BACKEND TRITON_TENSORRTLLM_BACKEND_SOURCE_DIR
+  tensorrtllm_backend TRITON_TENSORRTLLM_BACKEND_REPO_TAG tensorrtllm_backend)
+triton_add_backend(TRITON_ENABLE_IDENTITY_BACKEND    TRITON_IDENTITY_BACKEND_SOURCE_DIR
+  identity_backend    TRITON_IDENTITY_BACKEND_REPO_TAG    identity_backend)
+triton_add_backend(TRITON_ENABLE_REPEAT_BACKEND      TRITON_REPEAT_BACKEND_SOURCE_DIR
+  repeat_backend      TRITON_REPEAT_BACKEND_REPO_TAG      repeat_backend)
+triton_add_backend(TRITON_ENABLE_SQUARE_BACKEND      TRITON_SQUARE_BACKEND_SOURCE_DIR
+  square_backend      TRITON_SQUARE_BACKEND_REPO_TAG      square_backend)
+
+# 'backends' aggregate target — builds all enabled backends via cmake --target backends.
+# Individual targets: cmake --build --preset <p> --target <name>-backend
+# (each backend's CMakeLists.txt is expected to define install() rules that place
+#  the backend .so under ${CMAKE_INSTALL_PREFIX}/backends/<name>/)
+add_custom_target(backends
+  COMMENT "Building all enabled backend implementations"
 )
+foreach(_bl ${_triton_backend_labels})
+  # Create a per-backend target alias: python-backend, onnxruntime-backend, etc.
+  # The label (e.g. "python_backend") maps to the add_subdirectory binary subdir;
+  # cmake --build with that subdir name builds all targets from that backend.
+  string(REPLACE "_" "-" _backend_target_name "${_bl}")
+  add_custom_target("${_backend_target_name}"
+    COMMAND ${CMAKE_COMMAND} --build "${CMAKE_BINARY_DIR}/${_bl}"
+                             --config $<CONFIG>
+    COMMENT "Building ${_bl}"
+    VERBATIM
+  )
+  add_dependencies(backends "${_backend_target_name}")
+endforeach()
+
+# ---------------------------------------------------------------------------
+# Triton plugins: repository agents and cache providers
+#
+# These are server-side extension plugins that are shipped alongside the server
+# but live in separate repositories.  Each can be built standalone from its own
+# checkout, or included here via the same local-checkout / FetchContent pattern.
+#
+# Build a single plugin:
+#   cmake --build --preset <preset> --target <name>
+#
+# Build all enabled plugins:
+#   cmake --build --preset <preset> --target plugins
+# ---------------------------------------------------------------------------
+
+option(TRITON_ENABLE_CHECKSUM_REPO_AGENT "Build checksum repository agent" OFF)
+option(TRITON_ENABLE_LOCAL_CACHE        "Build local response cache"       OFF)
+option(TRITON_ENABLE_REDIS_CACHE        "Build Redis response cache"       OFF)
+
+set(TRITON_CHECKSUM_REPO_AGENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../checksum_repository_agent"
+    CACHE PATH "Local checkout of checksum_repository_agent repo")
+set(TRITON_LOCAL_CACHE_SOURCE_DIR         "${CMAKE_CURRENT_SOURCE_DIR}/../local_cache"
+    CACHE PATH "Local checkout of local_cache repo")
+set(TRITON_REDIS_CACHE_SOURCE_DIR         "${CMAKE_CURRENT_SOURCE_DIR}/../redis_cache"
+    CACHE PATH "Local checkout of redis_cache repo")
+
+set(TRITON_CHECKSUM_REPO_AGENT_REPO_TAG "main" CACHE STRING "Git ref for checksum_repository_agent FetchContent")
+set(TRITON_LOCAL_CACHE_REPO_TAG         "main" CACHE STRING "Git ref for local_cache FetchContent")
+set(TRITON_REDIS_CACHE_REPO_TAG         "main" CACHE STRING "Git ref for redis_cache FetchContent")
+
+set(_triton_plugin_labels "")
+
+macro(triton_add_plugin _opt _dir _repo_name _repo_tag_var _label)
+  if(${_opt})
+    if(EXISTS "${${_dir}}/CMakeLists.txt")
+      message(STATUS "triton/${_repo_name}: using local checkout at ${${_dir}}")
+      add_subdirectory("${${_dir}}" "${_label}")
+    elseif(TRITON_FETCH_MISSING_BACKENDS)
+      message(STATUS "triton/${_repo_name}: not found locally — fetching "
+              "https://github.com/triton-inference-server/${_repo_name}.git "
+              "@${${_repo_tag_var}}")
+      FetchContent_Declare(
+        ${_repo_name}
+        GIT_REPOSITORY "https://github.com/triton-inference-server/${_repo_name}.git"
+        GIT_TAG        "${${_repo_tag_var}}"
+        GIT_SHALLOW    TRUE
+        SOURCE_DIR     "${${_dir}}"
+      )
+      FetchContent_MakeAvailable(${_repo_name})
+    else()
+      message(WARNING
+        "${_opt}=ON but '${${_dir}}' not found and "
+        "TRITON_FETCH_MISSING_BACKENDS=OFF — skipping ${_repo_name}")
+    endif()
+    list(APPEND _triton_plugin_labels "${_label}")
+  endif()
+endmacro()
+
+triton_add_plugin(TRITON_ENABLE_CHECKSUM_REPO_AGENT TRITON_CHECKSUM_REPO_AGENT_SOURCE_DIR
+  checksum_repository_agent TRITON_CHECKSUM_REPO_AGENT_REPO_TAG checksum_repository_agent)
+triton_add_plugin(TRITON_ENABLE_LOCAL_CACHE TRITON_LOCAL_CACHE_SOURCE_DIR
+  local_cache TRITON_LOCAL_CACHE_REPO_TAG local_cache)
+triton_add_plugin(TRITON_ENABLE_REDIS_CACHE TRITON_REDIS_CACHE_SOURCE_DIR
+  redis_cache TRITON_REDIS_CACHE_REPO_TAG redis_cache)
+
+# 'plugins' aggregate target — builds all enabled repo agents and cache providers.
+add_custom_target(plugins
+  COMMENT "Building all enabled Triton plugin extensions"
+)
+foreach(_pl ${_triton_plugin_labels})
+  string(REPLACE "_" "-" _plugin_target_name "${_pl}")
+  add_custom_target("${_plugin_target_name}"
+    COMMAND ${CMAKE_COMMAND} --build "${CMAKE_BINARY_DIR}/${_pl}"
+                             --config $<CONFIG>
+    COMMENT "Building ${_pl}"
+    VERBATIM
+  )
+  add_dependencies(plugins "${_plugin_target_name}")
+endforeach()
+
+# ---------------------------------------------------------------------------
+# Client & perf_analyzer — optional; buildable without the server library.
+# When TRITON_SERVER_LIBRARY is empty/not found, downstream CMakeLists.txt
+# must link against the stub target (provided via core/include/):
+#   target_link_libraries(<target> PRIVATE triton::tritonserver_stub)
+# ---------------------------------------------------------------------------
+option(TRITON_ENABLE_CLIENT       "Build Triton client libraries"  OFF)
+option(TRITON_ENABLE_PERF_ANALYZER "Build perf_analyzer tool"      OFF)
+
+set(TRITON_CLIENT_SOURCE_DIR       "${CMAKE_CURRENT_SOURCE_DIR}/../client"
+    CACHE PATH "Local checkout of client repo")
+set(TRITON_PERF_ANALYZER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../perf_analyzer"
+    CACHE PATH "Local checkout of perf_analyzer repo")
+
+set(TRITON_CLIENT_REPO_TAG       "main" CACHE STRING "Git ref for client FetchContent fallback")
+set(TRITON_PERF_ANALYZER_REPO_TAG "main" CACHE STRING "Git ref for perf_analyzer FetchContent fallback")
+
+# Provide a header-only stub target so client/perf_analyzer can build
+# without libtritonserver.so present (they communicate via gRPC/HTTP only).
+if(TRITON_ENABLE_CLIENT OR TRITON_ENABLE_PERF_ANALYZER)
+  if(NOT TARGET triton::tritonserver_stub)
+    add_library(tritonserver_stub INTERFACE)
+    add_library(triton::tritonserver_stub ALIAS tritonserver_stub)
+    target_include_directories(tritonserver_stub INTERFACE
+      ${TRITON_CORE_SOURCE_DIR}/include)
+  endif()
+endif()
+
+if(TRITON_ENABLE_CLIENT)
+  triton_add_internal_repo(
+    triton-client TRITON_CLIENT_SOURCE_DIR
+    "https://github.com/triton-inference-server/client.git"
+    TRITON_CLIENT_REPO_TAG client)
+endif()
+
+if(TRITON_ENABLE_PERF_ANALYZER)
+  if(EXISTS "${TRITON_PERF_ANALYZER_SOURCE_DIR}/CMakeLists.txt")
+    message(STATUS "triton/perf_analyzer: using local checkout at ${TRITON_PERF_ANALYZER_SOURCE_DIR}")
+    add_subdirectory(${TRITON_PERF_ANALYZER_SOURCE_DIR} perf_analyzer)
+  else()
+    message(STATUS "triton/perf_analyzer: local checkout not found — fetching ${TRITON_PERF_ANALYZER_REPO_TAG}")
+    FetchContent_Declare(
+      triton-perf-analyzer
+      GIT_REPOSITORY "https://github.com/triton-inference-server/perf_analyzer.git"
+      GIT_TAG        ${TRITON_PERF_ANALYZER_REPO_TAG}
+      GIT_SHALLOW    TRUE
+    )
+    FetchContent_MakeAvailable(triton-perf-analyzer)
+  endif()
+endif()
+
+# ---------------------------------------------------------------------------
+# Python wheel targets (Req 29)
+# Built AFTER the main CMake compilation as an explicit additional step:
+#   cmake --build --preset <preset> --target wheels
+#
+# Individual wheel targets:
+#   tritonserver-wheel  — tritonserver Python SDK wheel (from core/python/)
+#   client-wheel        — tritonclient wheels (from client/)
+#   wheels              — convenience target that builds all wheels
+# ---------------------------------------------------------------------------
+find_package(Python3 QUIET COMPONENTS Interpreter)
+
+if(Python3_FOUND)
+  # Server Python SDK wheel (core/python/tritonserver package)
+  set(_server_py_dir "${CMAKE_CURRENT_SOURCE_DIR}/../core/python")
+  if(EXISTS "${_server_py_dir}/setup.py")
+    add_custom_target(tritonserver-wheel
+      COMMAND ${Python3_EXECUTABLE} -m pip wheel
+        --no-deps
+        --wheel-dir "${CMAKE_BINARY_DIR}/wheels"
+        "${_server_py_dir}"
+      COMMENT "Building tritonserver Python wheel"
+      VERBATIM
+    )
+  endif()
+
+  # Client wheels (tritonclient[grpc] + tritonclient[http])
+  set(_client_py_dir "${CMAKE_CURRENT_SOURCE_DIR}/../client")
+  if(EXISTS "${_client_py_dir}/setup.py" OR EXISTS "${_client_py_dir}/pyproject.toml")
+    add_custom_target(client-wheel
+      COMMAND ${Python3_EXECUTABLE} -m pip wheel
+        --no-deps
+        --wheel-dir "${CMAKE_BINARY_DIR}/wheels"
+        "${_client_py_dir}"
+      COMMENT "Building tritonclient Python wheels"
+      VERBATIM
+    )
+  endif()
+
+  # Convenience umbrella target
+  add_custom_target(wheels
+    COMMENT "Building all Python wheels into ${CMAKE_BINARY_DIR}/wheels/"
+  )
+  if(TARGET tritonserver-wheel)
+    add_dependencies(wheels tritonserver-wheel)
+  endif()
+  if(TARGET client-wheel)
+    add_dependencies(wheels client-wheel)
+  endif()
+else()
+  message(STATUS "Python3 not found — 'wheels' target will not be available")
+endif()
diff --git a/CMakePresets.json b/CMakePresets.json
new file mode 100644
index 0000000000..21b4cd4199
--- /dev/null
+++ b/CMakePresets.json
@@ -0,0 +1,4 @@
+{
+  "version": 6,
+  "include": ["cmake/CMakePresets.json"]
+}
diff --git a/build.py b/build.py.legacy
similarity index 100%
rename from build.py
rename to build.py.legacy
diff --git a/cmake/CMakePresets.json b/cmake/CMakePresets.json
new file mode 100644
index 0000000000..4b0019ddda
--- /dev/null
+++ b/cmake/CMakePresets.json
@@ -0,0 +1,88 @@
+{
+  "version": 6,
+  "cmakeMinimumRequired": { "major": 3, "minor": 25, "patch": 0 },
+  "vendor": {
+    "note": "AUTHORITATIVE PRESET FILE. When building from server/ all downstream repos (core, common, backend) are included via add_subdirectory and inherit these variables. Downstream repos have their own CMakePresets.json for standalone builds only."
+  },
+  "configurePresets": [
+    {
+      "name": "conan-base",
+      "hidden": true,
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": { "CMAKE_EXPORT_COMPILE_COMMANDS": "ON" }
+    },
+    {
+      "name": "release",
+      "inherits": "conan-base",
+      "displayName": "Release (GPU + HTTP + gRPC + Metrics)",
+      "cacheVariables": {
+        "CMAKE_BUILD_TYPE":      "Release",
+        "TRITON_ENABLE_GPU":     "ON",
+        "TRITON_ENABLE_HTTP":    "ON",
+        "TRITON_ENABLE_GRPC":    "ON",
+        "TRITON_ENABLE_METRICS": "ON"
+      }
+    },
+    {
+      "name": "debug",
+      "inherits": "conan-base",
+      "displayName": "Debug (GPU + HTTP + gRPC + Metrics)",
+      "cacheVariables": {
+        "CMAKE_BUILD_TYPE":      "Debug",
+        "TRITON_ENABLE_GPU":     "ON",
+        "TRITON_ENABLE_HTTP":    "ON",
+        "TRITON_ENABLE_GRPC":    "ON",
+        "TRITON_ENABLE_METRICS": "ON"
+      }
+    },
+    {
+      "name": "cpu-only",
+      "inherits": "conan-base",
+      "displayName": "Release CPU-only",
+      "cacheVariables": {
+        "CMAKE_BUILD_TYPE":          "Release",
+        "TRITON_ENABLE_GPU":         "OFF",
+        "TRITON_ENABLE_METRICS_GPU": "OFF",
+        "TRITON_ENABLE_HTTP":        "ON",
+        "TRITON_ENABLE_GRPC":        "ON",
+        "TRITON_ENABLE_METRICS":     "ON"
+      }
+    },
+    {
+      "name": "all-features",
+      "inherits": "conan-base",
+      "displayName": "Release - all features (no cloud storage)",
+      "cacheVariables": {
+        "CMAKE_BUILD_TYPE":                   "Release",
+        "TRITON_ENABLE_GPU":                  "ON",
+        "TRITON_ENABLE_HTTP":                 "ON",
+        "TRITON_ENABLE_GRPC":                 "ON",
+        "TRITON_ENABLE_METRICS":              "ON",
+        "TRITON_ENABLE_METRICS_GPU":          "ON",
+        "TRITON_ENABLE_METRICS_CPU":          "ON",
+        "TRITON_ENABLE_TRACING":              "ON",
+        "TRITON_ENABLE_ENSEMBLE":             "ON",
+        "TRITON_ENABLE_GCS":                  "OFF",
+        "TRITON_ENABLE_S3":                   "OFF",
+        "TRITON_ENABLE_AZURE_STORAGE":        "OFF",
+        "TRITON_ENABLE_IDENTITY_BACKEND":     "ON",
+        "TRITON_ENABLE_REPEAT_BACKEND":       "ON",
+        "TRITON_ENABLE_SQUARE_BACKEND":       "ON",
+        "TRITON_ENABLE_PYTHON_BACKEND":       "OFF",
+        "TRITON_ENABLE_ONNXRUNTIME_BACKEND":  "OFF",
+        "TRITON_ENABLE_PYTORCH_BACKEND":      "OFF",
+        "TRITON_ENABLE_TENSORRT_BACKEND":     "OFF",
+        "TRITON_ENABLE_VLLM_BACKEND":         "OFF",
+        "TRITON_ENABLE_TENSORRTLLM_BACKEND":  "OFF"
+      }
+    }
+  ],
+  "buildPresets": [
+    { "name": "release",      "configurePreset": "release" },
+    { "name": "debug",        "configurePreset": "debug" },
+    { "name": "cpu-only",     "configurePreset": "cpu-only" },
+    { "name": "all-features", "configurePreset": "all-features" }
+  ]
+}
diff --git a/cmake/TritonCheckCxxAbi.cmake b/cmake/TritonCheckCxxAbi.cmake
new file mode 100644
index 0000000000..17acb32e1c
--- /dev/null
+++ b/cmake/TritonCheckCxxAbi.cmake
@@ -0,0 +1,110 @@
+# TritonCheckCxxAbi.cmake
+#
+# Detects the _GLIBCXX_USE_CXX11_ABI value that the current build will use and
+# validates that the Conan-provided binary packages were compiled with the same
+# ABI setting.  A mismatch causes silent link failures or runtime crashes when
+# mixing old ABI (=0) and new ABI (=1) translation units.
+#
+# Usage (call once, early in the root CMakeLists.txt after find_package calls):
+#   include(cmake/TritonCheckCxxAbi.cmake)
+#   triton_check_cxx_abi()
+#
+# The macro sets TRITON_GLIBCXX_USE_CXX11_ABI (cache variable) to 0 or 1.
+
+macro(triton_check_cxx_abi)
+    if(NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+        return()
+    endif()
+
+    # ---------------------------------------------------------------
+    # 1. Probe the ABI the compiler will use for this build.
+    # ---------------------------------------------------------------
+    include(CheckCXXSourceCompiles)
+    set(_abi_probe_src "
+#include <string>
+#if _GLIBCXX_USE_CXX11_ABI
+int use_new_abi = 1;
+#else
+int use_new_abi = 0;
+#endif
+int main() { return use_new_abi; }
+")
+    # Ask the compiler directly via try_compile output rather than
+    # running the binary, so cross-compilation works too.
+    file(WRITE "${CMAKE_BINARY_DIR}/_abi_probe.cpp" "${_abi_probe_src}")
+    try_compile(
+        _abi_compile_ok
+        "${CMAKE_BINARY_DIR}/_abi_probe_build"
+        SOURCES "${CMAKE_BINARY_DIR}/_abi_probe.cpp"
+        CXX_STANDARD 17
+        OUTPUT_VARIABLE _abi_compile_output
+    )
+
+    if(NOT _abi_compile_ok)
+        message(WARNING "TritonCheckCxxAbi: could not compile ABI probe. Skipping ABI check.")
+        return()
+    endif()
+
+    # Extract the _GLIBCXX_USE_CXX11_ABI value by compiling a file that
+    # emits a preprocessor-expanded value we can grep.
+    file(WRITE "${CMAKE_BINARY_DIR}/_abi_value.cpp"
+        "#include <string>\nint abi_value = _GLIBCXX_USE_CXX11_ABI;\n")
+    execute_process(
+        COMMAND "${CMAKE_CXX_COMPILER}" -E -dM
+                "${CMAKE_BINARY_DIR}/_abi_value.cpp"
+        OUTPUT_VARIABLE _abi_macros
+        ERROR_QUIET
+    )
+
+    if(_abi_macros MATCHES "#define _GLIBCXX_USE_CXX11_ABI ([01])")
+        set(_detected_abi "${CMAKE_MATCH_1}")
+    else()
+        # Default: GCC >= 5 defaults to 1 (new ABI).
+        set(_detected_abi 1)
+    endif()
+
+    set(TRITON_GLIBCXX_USE_CXX11_ABI "${_detected_abi}"
+        CACHE STRING "Detected _GLIBCXX_USE_CXX11_ABI value (0=old, 1=new)" FORCE)
+    message(STATUS "TritonCheckCxxAbi: _GLIBCXX_USE_CXX11_ABI = ${TRITON_GLIBCXX_USE_CXX11_ABI}")
+
+    # ---------------------------------------------------------------
+    # 2. Validate Conan-provided binary packages that embed ABI info.
+    # ---------------------------------------------------------------
+    # Conan 2 records the compiler.libcxx setting in the package ID.
+    # If the Conan toolchain was generated we can cross-check via the
+    # conanbuildinfo vars; otherwise we skip silently.
+    if(DEFINED CONAN_COMPILER_LIBCXX)
+        if("${CONAN_COMPILER_LIBCXX}" STREQUAL "libstdc++" AND
+           "${TRITON_GLIBCXX_USE_CXX11_ABI}" STREQUAL "1")
+            message(FATAL_ERROR
+                "ABI MISMATCH: Build is using new C++11 ABI "
+                "(_GLIBCXX_USE_CXX11_ABI=1) but Conan packages were built "
+                "with libstdc++ (old ABI). Re-run 'conan install' with "
+                "compiler.libcxx=libstdc++11 in the host profile.")
+        endif()
+        if("${CONAN_COMPILER_LIBCXX}" STREQUAL "libstdc++11" AND
+           "${TRITON_GLIBCXX_USE_CXX11_ABI}" STREQUAL "0")
+            message(FATAL_ERROR
+                "ABI MISMATCH: Build is using old ABI "
+                "(_GLIBCXX_USE_CXX11_ABI=0) but Conan packages were built "
+                "with libstdc++11 (new ABI). Re-run 'conan install' with "
+                "compiler.libcxx=libstdc++ in the host profile, or remove "
+                "-D_GLIBCXX_USE_CXX11_ABI=0 from your compile flags.")
+        endif()
+    endif()
+
+    # ---------------------------------------------------------------
+    # 3. Warn if a user has manually set a conflicting compile flag.
+    # ---------------------------------------------------------------
+    if(DEFINED CMAKE_CXX_FLAGS AND
+       CMAKE_CXX_FLAGS MATCHES "_GLIBCXX_USE_CXX11_ABI=([01])")
+        set(_flag_abi "${CMAKE_MATCH_1}")
+        if(NOT "${_flag_abi}" STREQUAL "${_detected_abi}")
+            message(WARNING
+                "TritonCheckCxxAbi: CMAKE_CXX_FLAGS sets "
+                "_GLIBCXX_USE_CXX11_ABI=${_flag_abi} but the compiler "
+                "default is ${_detected_abi}. This will likely cause "
+                "linker errors with Conan-provided binary packages.")
+        endif()
+    endif()
+endmacro()
diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json
new file mode 100644
index 0000000000..c512080276
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json
@@ -0,0 +1,64 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonClient",
+      "platform": "Ubuntu aarch64, CPU-only (Graviton, Neoverse, Apple Silicon dev)",
+      "notes": [
+        "CPU-only client build for aarch64 — no CUDA required.",
+        "Targets AWS Graviton, Ampere Altra, and NVIDIA Grace (CPU die).",
+        "Run cmake from the client/ source directory."
+      ],
+      "conan_install": [
+        "conan install client/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=False'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=client/build/triton-client-release-cpu-ubuntu-aarch64/conan",
+        "",
+        "cmake --preset triton-client-release-cpu-ubuntu-aarch64",
+        "cmake --build --preset triton-client-release-cpu-ubuntu-aarch64",
+        "cmake --build --preset triton-client-release-cpu-ubuntu-aarch64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-client-release-cpu-ubuntu-aarch64",
+      "displayName": "TritonClient — Release, CPU-only, Ubuntu, aarch64",
+      "description": "CPU-only client library build for Ubuntu Linux on aarch64.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":               "Release",
+        "TRITON_ENABLE_GPU":              "OFF",
+        "TRITON_ENABLE_CC_HTTP":          "ON",
+        "TRITON_ENABLE_CC_GRPC":          "ON",
+        "TRITON_ENABLE_PYTHON_HTTP":      "ON",
+        "TRITON_ENABLE_PYTHON_GRPC":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER":    "ON",
+        "TRITON_ENABLE_EXAMPLES":         "ON",
+        "TRITON_ENABLE_TESTS":            "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-client-release-cpu-ubuntu-aarch64",
+      "configurePreset": "triton-client-release-cpu-ubuntu-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-client-release-cpu-ubuntu-aarch64",
+      "configurePreset": "triton-client-release-cpu-ubuntu-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json
new file mode 100644
index 0000000000..f32a410183
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json
@@ -0,0 +1,64 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonClient",
+      "platform": "Ubuntu x86_64, CPU-only (no NVIDIA GPU required)",
+      "notes": [
+        "CPU-only client build — no CUDA toolkit required.",
+        "CUDA shared memory operations are disabled.",
+        "Run cmake from the client/ source directory."
+      ],
+      "conan_install": [
+        "conan install client/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=False'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=client/build/triton-client-release-cpu-ubuntu-x86_64/conan",
+        "",
+        "cmake --preset triton-client-release-cpu-ubuntu-x86_64",
+        "cmake --build --preset triton-client-release-cpu-ubuntu-x86_64",
+        "cmake --build --preset triton-client-release-cpu-ubuntu-x86_64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-client-release-cpu-ubuntu-x86_64",
+      "displayName": "TritonClient — Release, CPU-only, Ubuntu, x86_64",
+      "description": "CPU-only client library build for Ubuntu Linux on x86_64. No CUDA required.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":               "Release",
+        "TRITON_ENABLE_GPU":              "OFF",
+        "TRITON_ENABLE_CC_HTTP":          "ON",
+        "TRITON_ENABLE_CC_GRPC":          "ON",
+        "TRITON_ENABLE_PYTHON_HTTP":      "ON",
+        "TRITON_ENABLE_PYTHON_GRPC":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER":    "ON",
+        "TRITON_ENABLE_EXAMPLES":         "ON",
+        "TRITON_ENABLE_TESTS":            "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-client-release-cpu-ubuntu-x86_64",
+      "configurePreset": "triton-client-release-cpu-ubuntu-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-client-release-cpu-ubuntu-x86_64",
+      "configurePreset": "triton-client-release-cpu-ubuntu-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json
new file mode 100644
index 0000000000..1e6546e50e
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json
@@ -0,0 +1,65 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonClient",
+      "platform": "manylinux (RHEL/CentOS-compatible) aarch64 with NVIDIA GPU (CUDA)",
+      "notes": [
+        "manylinux client build for aarch64 — broad Linux distribution compatibility.",
+        "Targets Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).",
+        "Run cmake from the client/ source directory."
+      ],
+      "conan_install": [
+        "conan install client/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-manylinux-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=client/build/triton-client-release-cuda-manylinux-aarch64/conan",
+        "",
+        "cmake --preset triton-client-release-cuda-manylinux-aarch64",
+        "cmake --build --preset triton-client-release-cuda-manylinux-aarch64",
+        "cmake --build --preset triton-client-release-cuda-manylinux-aarch64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-client-release-cuda-manylinux-aarch64",
+      "displayName": "TritonClient — Release, CUDA, manylinux, aarch64",
+      "description": "GPU-enabled client library build for RHEL-compatible Linux on aarch64.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":               "Release",
+        "TRITON_ENABLE_GPU":              "ON",
+        "TRITON_ENABLE_CC_HTTP":          "ON",
+        "TRITON_ENABLE_CC_GRPC":          "ON",
+        "TRITON_ENABLE_PYTHON_HTTP":      "ON",
+        "TRITON_ENABLE_PYTHON_GRPC":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER":    "ON",
+        "TRITON_ENABLE_EXAMPLES":         "OFF",
+        "TRITON_ENABLE_TESTS":            "OFF",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":     "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-client-release-cuda-manylinux-aarch64",
+      "configurePreset": "triton-client-release-cuda-manylinux-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-client-release-cuda-manylinux-aarch64",
+      "configurePreset": "triton-client-release-cuda-manylinux-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json
new file mode 100644
index 0000000000..d01c17a29f
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json
@@ -0,0 +1,65 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonClient",
+      "platform": "manylinux (RHEL/CentOS-compatible) x86_64 with NVIDIA GPU (CUDA)",
+      "notes": [
+        "manylinux client build — broad Linux distribution compatibility (GLIBC 2.17+).",
+        "Primary distribution target: the tritonclient Python wheels published to PyPI.",
+        "Run cmake from the client/ source directory."
+      ],
+      "conan_install": [
+        "conan install client/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-manylinux",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=client/build/triton-client-release-cuda-manylinux-x86_64/conan",
+        "",
+        "cmake --preset triton-client-release-cuda-manylinux-x86_64",
+        "cmake --build --preset triton-client-release-cuda-manylinux-x86_64",
+        "cmake --build --preset triton-client-release-cuda-manylinux-x86_64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-client-release-cuda-manylinux-x86_64",
+      "displayName": "TritonClient — Release, CUDA, manylinux, x86_64",
+      "description": "GPU-enabled client library build for RHEL/CentOS-compatible Linux on x86_64.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":               "Release",
+        "TRITON_ENABLE_GPU":              "ON",
+        "TRITON_ENABLE_CC_HTTP":          "ON",
+        "TRITON_ENABLE_CC_GRPC":          "ON",
+        "TRITON_ENABLE_PYTHON_HTTP":      "ON",
+        "TRITON_ENABLE_PYTHON_GRPC":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER":    "ON",
+        "TRITON_ENABLE_EXAMPLES":         "OFF",
+        "TRITON_ENABLE_TESTS":            "OFF",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":     "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-client-release-cuda-manylinux-x86_64",
+      "configurePreset": "triton-client-release-cuda-manylinux-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-client-release-cuda-manylinux-x86_64",
+      "configurePreset": "triton-client-release-cuda-manylinux-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json
new file mode 100644
index 0000000000..a80342c5ae
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json
@@ -0,0 +1,64 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonClient",
+      "platform": "Ubuntu aarch64 with NVIDIA GPU (CUDA) — Jetson Orin, Grace Hopper",
+      "notes": [
+        "GPU-enabled client build for aarch64.",
+        "Targets Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).",
+        "Run cmake from the client/ source directory."
+      ],
+      "conan_install": [
+        "conan install client/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=client/build/triton-client-release-cuda-ubuntu-aarch64/conan",
+        "",
+        "cmake --preset triton-client-release-cuda-ubuntu-aarch64",
+        "cmake --build --preset triton-client-release-cuda-ubuntu-aarch64"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-client-release-cuda-ubuntu-aarch64",
+      "displayName": "TritonClient — Release, CUDA, Ubuntu, aarch64",
+      "description": "GPU-enabled client library build for Ubuntu Linux on aarch64 (Jetson Orin, Grace Hopper).",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":               "Release",
+        "TRITON_ENABLE_GPU":              "ON",
+        "TRITON_ENABLE_CC_HTTP":          "ON",
+        "TRITON_ENABLE_CC_GRPC":          "ON",
+        "TRITON_ENABLE_PYTHON_HTTP":      "ON",
+        "TRITON_ENABLE_PYTHON_GRPC":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER":    "ON",
+        "TRITON_ENABLE_EXAMPLES":         "ON",
+        "TRITON_ENABLE_TESTS":            "ON",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":     "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-client-release-cuda-ubuntu-aarch64",
+      "configurePreset": "triton-client-release-cuda-ubuntu-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-client-release-cuda-ubuntu-aarch64",
+      "configurePreset": "triton-client-release-cuda-ubuntu-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json
new file mode 100644
index 0000000000..c27515e095
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json
@@ -0,0 +1,65 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonClient",
+      "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA)",
+      "notes": [
+        "GPU-enabled client build — CUDA shared memory support included.",
+        "Builds C++ and Python client libraries plus perf_analyzer.",
+        "Run cmake from the client/ source directory."
+      ],
+      "conan_install": [
+        "conan install client/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=client/build/triton-client-release-cuda-ubuntu-x86_64/conan",
+        "",
+        "cmake --preset triton-client-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-client-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-client-release-cuda-ubuntu-x86_64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-client-release-cuda-ubuntu-x86_64",
+      "displayName": "TritonClient — Release, CUDA, Ubuntu, x86_64",
+      "description": "GPU-enabled client library build for Ubuntu Linux on x86_64.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":               "Release",
+        "TRITON_ENABLE_GPU":              "ON",
+        "TRITON_ENABLE_CC_HTTP":          "ON",
+        "TRITON_ENABLE_CC_GRPC":          "ON",
+        "TRITON_ENABLE_PYTHON_HTTP":      "ON",
+        "TRITON_ENABLE_PYTHON_GRPC":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER":    "ON",
+        "TRITON_ENABLE_EXAMPLES":         "ON",
+        "TRITON_ENABLE_TESTS":            "ON",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":     "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-client-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-client-release-cuda-ubuntu-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-client-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-client-release-cuda-ubuntu-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json
new file mode 100644
index 0000000000..06e0cc1072
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json
@@ -0,0 +1,66 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonInferenceServer",
+      "platform": "Ubuntu aarch64, CPU-only (Graviton, Neoverse, Apple Silicon dev)",
+      "notes": [
+        "CPU-only server build for aarch64 — no CUDA required.",
+        "Targets AWS Graviton, Ampere Altra, NVIDIA Grace (CPU die), and similar ARM64 platforms.",
+        "SageMaker endpoint enabled: AWS Graviton instances are a common deployment target."
+      ],
+      "conan_install": [
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=False'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-inference-server-release-cpu-ubuntu-aarch64/conan",
+        "",
+        "cmake --preset triton-inference-server-release-cpu-ubuntu-aarch64",
+        "cmake --build --preset triton-inference-server-release-cpu-ubuntu-aarch64",
+        "cmake --build --preset triton-inference-server-release-cpu-ubuntu-aarch64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-inference-server-release-cpu-ubuntu-aarch64",
+      "displayName": "TritonInferenceServer — Release, CPU-only, Ubuntu, aarch64",
+      "description": "CPU-only release build for Ubuntu Linux on aarch64 (Graviton, Neoverse, Grace CPU).",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":          "Release",
+        "TRITON_ENABLE_GPU":         "OFF",
+        "TRITON_ENABLE_METRICS_GPU": "OFF",
+        "TRITON_ENABLE_HTTP":        "ON",
+        "TRITON_ENABLE_GRPC":        "ON",
+        "TRITON_ENABLE_METRICS":     "ON",
+        "TRITON_ENABLE_METRICS_CPU": "ON",
+        "TRITON_ENABLE_ENSEMBLE":    "ON",
+        "TRITON_ENABLE_SAGEMAKER":   "ON",
+        "TRITON_ENABLE_VERTEX_AI":   "OFF"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-inference-server-release-cpu-ubuntu-aarch64",
+      "configurePreset": "triton-inference-server-release-cpu-ubuntu-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-inference-server-release-cpu-ubuntu-aarch64",
+      "configurePreset": "triton-inference-server-release-cpu-ubuntu-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json
new file mode 100644
index 0000000000..e0e11cabed
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json
@@ -0,0 +1,66 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonInferenceServer",
+      "platform": "Ubuntu x86_64, CPU-only (no NVIDIA GPU required)",
+      "notes": [
+        "CPU-only server build — no CUDA toolkit required.",
+        "Suitable for backends: identity, repeat, ensemble, square, Python, ONNX Runtime (CPU), OpenVINO.",
+        "SageMaker and Vertex AI endpoints enabled."
+      ],
+      "conan_install": [
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=False'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-inference-server-release-cpu-ubuntu-x86_64/conan",
+        "",
+        "cmake --preset triton-inference-server-release-cpu-ubuntu-x86_64",
+        "cmake --build --preset triton-inference-server-release-cpu-ubuntu-x86_64",
+        "cmake --build --preset triton-inference-server-release-cpu-ubuntu-x86_64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-inference-server-release-cpu-ubuntu-x86_64",
+      "displayName": "TritonInferenceServer — Release, CPU-only, Ubuntu, x86_64",
+      "description": "CPU-only release build for Ubuntu Linux on x86_64. No CUDA required.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":          "Release",
+        "TRITON_ENABLE_GPU":         "OFF",
+        "TRITON_ENABLE_METRICS_GPU": "OFF",
+        "TRITON_ENABLE_HTTP":        "ON",
+        "TRITON_ENABLE_GRPC":        "ON",
+        "TRITON_ENABLE_METRICS":     "ON",
+        "TRITON_ENABLE_METRICS_CPU": "ON",
+        "TRITON_ENABLE_ENSEMBLE":    "ON",
+        "TRITON_ENABLE_SAGEMAKER":   "ON",
+        "TRITON_ENABLE_VERTEX_AI":   "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-inference-server-release-cpu-ubuntu-x86_64",
+      "configurePreset": "triton-inference-server-release-cpu-ubuntu-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-inference-server-release-cpu-ubuntu-x86_64",
+      "configurePreset": "triton-inference-server-release-cpu-ubuntu-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json
new file mode 100644
index 0000000000..fd72da0ce2
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json
@@ -0,0 +1,68 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonInferenceServer",
+      "platform": "manylinux (RHEL/CentOS-compatible) aarch64 with NVIDIA GPU (CUDA)",
+      "notes": [
+        "manylinux builds target GLIBC 2.17+ compatibility.",
+        "aarch64 CUDA targets: Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).",
+        "SageMaker and Vertex AI endpoints are disabled."
+      ],
+      "conan_install": [
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-manylinux-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-inference-server-release-cuda-manylinux-aarch64/conan",
+        "",
+        "cmake --preset triton-inference-server-release-cuda-manylinux-aarch64",
+        "cmake --build --preset triton-inference-server-release-cuda-manylinux-aarch64"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-inference-server-release-cuda-manylinux-aarch64",
+      "displayName": "TritonInferenceServer — Release, CUDA, manylinux, aarch64",
+      "description": "GPU-enabled release build for RHEL-compatible Linux on aarch64 (Orin, Grace Hopper).",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":              "Release",
+        "TRITON_ENABLE_GPU":             "ON",
+        "TRITON_ENABLE_HTTP":            "ON",
+        "TRITON_ENABLE_GRPC":            "ON",
+        "TRITON_ENABLE_METRICS":         "ON",
+        "TRITON_ENABLE_METRICS_GPU":     "ON",
+        "TRITON_ENABLE_METRICS_CPU":     "ON",
+        "TRITON_ENABLE_ENSEMBLE":        "ON",
+        "TRITON_ENABLE_SAGEMAKER":       "OFF",
+        "TRITON_ENABLE_VERTEX_AI":       "OFF",
+        "TRITON_MIN_COMPUTE_CAPABILITY": "8.7",
+        "CMAKE_CUDA_ARCHITECTURES":      "87;90",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":    "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-manylinux-aarch64",
+      "configurePreset": "triton-inference-server-release-cuda-manylinux-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-manylinux-aarch64",
+      "configurePreset": "triton-inference-server-release-cuda-manylinux-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json
new file mode 100644
index 0000000000..b2aa9690e8
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json
@@ -0,0 +1,68 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonInferenceServer",
+      "platform": "manylinux (RHEL/CentOS-compatible) x86_64 with NVIDIA GPU (CUDA)",
+      "notes": [
+        "manylinux builds target GLIBC 2.17+ compatibility for broad Linux distribution support.",
+        "SageMaker and Vertex AI endpoints are disabled (not used in RHEL-based container deployments).",
+        "Use the linux-gcc13-release-manylinux Conan profile which links against the manylinux sysroot."
+      ],
+      "conan_install": [
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-manylinux",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-inference-server-release-cuda-manylinux-x86_64/conan",
+        "",
+        "cmake --preset triton-inference-server-release-cuda-manylinux-x86_64",
+        "cmake --build --preset triton-inference-server-release-cuda-manylinux-x86_64"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-inference-server-release-cuda-manylinux-x86_64",
+      "displayName": "TritonInferenceServer — Release, CUDA, manylinux, x86_64",
+      "description": "GPU-enabled release build for RHEL/CentOS-compatible (manylinux2014) Linux on x86_64.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":              "Release",
+        "TRITON_ENABLE_GPU":             "ON",
+        "TRITON_ENABLE_HTTP":            "ON",
+        "TRITON_ENABLE_GRPC":            "ON",
+        "TRITON_ENABLE_METRICS":         "ON",
+        "TRITON_ENABLE_METRICS_GPU":     "ON",
+        "TRITON_ENABLE_METRICS_CPU":     "ON",
+        "TRITON_ENABLE_ENSEMBLE":        "ON",
+        "TRITON_ENABLE_SAGEMAKER":       "OFF",
+        "TRITON_ENABLE_VERTEX_AI":       "OFF",
+        "TRITON_MIN_COMPUTE_CAPABILITY": "6.0",
+        "CMAKE_CUDA_ARCHITECTURES":      "60;70;75;80;86;89;90",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":    "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-manylinux-x86_64",
+      "configurePreset": "triton-inference-server-release-cuda-manylinux-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-manylinux-x86_64",
+      "configurePreset": "triton-inference-server-release-cuda-manylinux-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json
new file mode 100644
index 0000000000..19305f5cf9
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json
@@ -0,0 +1,68 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonInferenceServer",
+      "platform": "Ubuntu aarch64 with NVIDIA GPU (CUDA) — Jetson Orin, Grace Hopper",
+      "notes": [
+        "GPU-enabled server build for Ubuntu on aarch64.",
+        "Targets Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).",
+        "SageMaker and Vertex AI endpoints enabled."
+      ],
+      "conan_install": [
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-inference-server-release-cuda-ubuntu-aarch64/conan",
+        "",
+        "cmake --preset triton-inference-server-release-cuda-ubuntu-aarch64",
+        "cmake --build --preset triton-inference-server-release-cuda-ubuntu-aarch64"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-inference-server-release-cuda-ubuntu-aarch64",
+      "displayName": "TritonInferenceServer — Release, CUDA, Ubuntu, aarch64",
+      "description": "GPU-enabled release build for Ubuntu Linux on aarch64 (Jetson Orin = SM 8.7, Grace Hopper = SM 9.0).",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":              "Release",
+        "TRITON_ENABLE_GPU":             "ON",
+        "TRITON_ENABLE_HTTP":            "ON",
+        "TRITON_ENABLE_GRPC":            "ON",
+        "TRITON_ENABLE_METRICS":         "ON",
+        "TRITON_ENABLE_METRICS_GPU":     "ON",
+        "TRITON_ENABLE_METRICS_CPU":     "ON",
+        "TRITON_ENABLE_ENSEMBLE":        "ON",
+        "TRITON_ENABLE_SAGEMAKER":       "ON",
+        "TRITON_ENABLE_VERTEX_AI":       "ON",
+        "TRITON_MIN_COMPUTE_CAPABILITY": "8.7",
+        "CMAKE_CUDA_ARCHITECTURES":      "87;90",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":    "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-ubuntu-aarch64",
+      "configurePreset": "triton-inference-server-release-cuda-ubuntu-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-ubuntu-aarch64",
+      "configurePreset": "triton-inference-server-release-cuda-ubuntu-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json
new file mode 100644
index 0000000000..a58995bc82
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json
@@ -0,0 +1,69 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonInferenceServer",
+      "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA)",
+      "notes": [
+        "GPU-enabled server build for Ubuntu on x86_64.",
+        "Requires CUDA toolkit, cuDNN, and an NVIDIA driver.",
+        "SageMaker and Vertex AI endpoints enabled for cloud deployment."
+      ],
+      "conan_install": [
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-inference-server-release-cuda-ubuntu-x86_64/conan",
+        "",
+        "cmake --preset triton-inference-server-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-inference-server-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-inference-server-release-cuda-ubuntu-x86_64 --target wheels"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-inference-server-release-cuda-ubuntu-x86_64",
+      "displayName": "TritonInferenceServer — Release, CUDA, Ubuntu, x86_64",
+      "description": "GPU-enabled release build for Ubuntu Linux on x86_64. Requires CUDA toolkit and NVIDIA driver.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":              "Release",
+        "TRITON_ENABLE_GPU":             "ON",
+        "TRITON_ENABLE_HTTP":            "ON",
+        "TRITON_ENABLE_GRPC":            "ON",
+        "TRITON_ENABLE_METRICS":         "ON",
+        "TRITON_ENABLE_METRICS_GPU":     "ON",
+        "TRITON_ENABLE_METRICS_CPU":     "ON",
+        "TRITON_ENABLE_ENSEMBLE":        "ON",
+        "TRITON_ENABLE_SAGEMAKER":       "ON",
+        "TRITON_ENABLE_VERTEX_AI":       "ON",
+        "TRITON_MIN_COMPUTE_CAPABILITY": "7.5",
+        "CMAKE_CUDA_ARCHITECTURES":      "75;80;86;89;90",
+        "CMAKE_CUDA_RUNTIME_LIBRARY":    "Shared"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-inference-server-release-cuda-ubuntu-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-inference-server-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-inference-server-release-cuda-ubuntu-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json
new file mode 100644
index 0000000000..2f20f9caa4
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json
@@ -0,0 +1,62 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonPerfAnalyzer",
+      "platform": "manylinux (RHEL/CentOS-compatible) aarch64",
+      "notes": [
+        "PerfAnalyzer manylinux build for aarch64 — broad compatibility (GLIBC 2.17+).",
+        "Targets Jetson Orin, Grace Hopper, and AWS Graviton platforms.",
+        "GPU mode enabled for benchmarking GPU models on aarch64.",
+        "Run cmake from the perf_analyzer/ source directory."
+      ],
+      "conan_install": [
+        "conan install perf_analyzer/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-manylinux-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=perf_analyzer/build/triton-perf-analyzer-release-manylinux-aarch64/conan",
+        "",
+        "cmake --preset triton-perf-analyzer-release-manylinux-aarch64",
+        "cmake --build --preset triton-perf-analyzer-release-manylinux-aarch64"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-perf-analyzer-release-manylinux-aarch64",
+      "displayName": "TritonPerfAnalyzer — Release, manylinux, aarch64",
+      "description": "PerfAnalyzer release build for manylinux-compatible Linux on aarch64.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":                     "Release",
+        "TRITON_ENABLE_GPU":                    "ON",
+        "TRITON_ENABLE_CC_HTTP":                "ON",
+        "TRITON_ENABLE_CC_GRPC":                "ON",
+        "TRITON_ENABLE_PERF_ANALYZER_OPENAI":   "ON",
+        "TRITON_ENABLE_PERF_ANALYZER_TFS":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER_TS":       "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-perf-analyzer-release-manylinux-aarch64",
+      "configurePreset": "triton-perf-analyzer-release-manylinux-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-perf-analyzer-release-manylinux-aarch64",
+      "configurePreset": "triton-perf-analyzer-release-manylinux-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json
new file mode 100644
index 0000000000..bde85fdca5
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json
@@ -0,0 +1,62 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonPerfAnalyzer",
+      "platform": "manylinux (RHEL/CentOS-compatible) x86_64",
+      "notes": [
+        "PerfAnalyzer is distributed as a standalone binary — manylinux ensures maximum",
+        "compatibility across Linux distributions (GLIBC 2.17+).",
+        "GPU mode (CUDA shared memory, GPU metrics) is enabled for benchmarking GPU models.",
+        "Run cmake from the perf_analyzer/ source directory."
+      ],
+      "conan_install": [
+        "conan install perf_analyzer/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-manylinux",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  --build=missing",
+        "  --output-folder=perf_analyzer/build/triton-perf-analyzer-release-manylinux-x86_64/conan",
+        "",
+        "cmake --preset triton-perf-analyzer-release-manylinux-x86_64",
+        "cmake --build --preset triton-perf-analyzer-release-manylinux-x86_64"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-perf-analyzer-release-manylinux-x86_64",
+      "displayName": "TritonPerfAnalyzer — Release, manylinux, x86_64",
+      "description": "PerfAnalyzer release build for manylinux-compatible Linux on x86_64.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+        "CMAKE_BUILD_TYPE":                     "Release",
+        "TRITON_ENABLE_GPU":                    "ON",
+        "TRITON_ENABLE_CC_HTTP":                "ON",
+        "TRITON_ENABLE_CC_GRPC":                "ON",
+        "TRITON_ENABLE_PERF_ANALYZER_OPENAI":   "ON",
+        "TRITON_ENABLE_PERF_ANALYZER_TFS":      "ON",
+        "TRITON_ENABLE_PERF_ANALYZER_TS":       "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-perf-analyzer-release-manylinux-x86_64",
+      "configurePreset": "triton-perf-analyzer-release-manylinux-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-perf-analyzer-release-manylinux-x86_64",
+      "configurePreset": "triton-perf-analyzer-release-manylinux-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json
new file mode 100644
index 0000000000..d9672ed852
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json
@@ -0,0 +1,73 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonTRTLLMBackend",
+      "platform": "Ubuntu aarch64 with NVIDIA GPU (CUDA) — Grace Hopper (GH200)",
+      "notes": [
+        "TensorRT-LLM backend build for Ubuntu on aarch64.",
+        "Primary target: NVIDIA Grace Hopper Superchip (GH200, SM 9.0).",
+        "The TensorRT-LLM library must be installed as a Python wheel before the build:",
+        "  pip install tensorrt-llm  --extra-index-url https://pypi.nvidia.com",
+        "Requires CUDA 12.x and TensorRT 10.x for aarch64.",
+        "Run cmake from the server/ source directory."
+      ],
+      "conan_install": [
+        "pip install tensorrt-llm --extra-index-url https://pypi.nvidia.com",
+        "",
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release-aarch64",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-trtllm-backend-release-cuda-ubuntu-aarch64/conan",
+        "",
+        "cmake --preset triton-trtllm-backend-release-cuda-ubuntu-aarch64",
+        "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-aarch64",
+        "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-aarch64 --target tensorrtllm-backend"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-trtllm-backend-release-cuda-ubuntu-aarch64",
+      "displayName": "TritonTRTLLMBackend — Release, CUDA, Ubuntu, aarch64",
+      "description": "Triton server + TensorRT-LLM backend for Ubuntu Linux on aarch64 (Grace Hopper GH200 = SM 9.0). Requires tensorrt-llm wheel installed before cmake configure.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS":       "ON",
+        "CMAKE_BUILD_TYPE":                    "Release",
+        "TRITON_ENABLE_GPU":                   "ON",
+        "TRITON_ENABLE_HTTP":                  "ON",
+        "TRITON_ENABLE_GRPC":                  "ON",
+        "TRITON_ENABLE_METRICS":               "ON",
+        "TRITON_ENABLE_METRICS_GPU":           "ON",
+        "TRITON_ENABLE_METRICS_CPU":           "ON",
+        "TRITON_ENABLE_ENSEMBLE":              "ON",
+        "TRITON_MIN_COMPUTE_CAPABILITY":       "9.0",
+        "CMAKE_CUDA_ARCHITECTURES":            "90",
+        "TRITON_ENABLE_TENSORRTLLM_BACKEND":   "ON",
+        "TRITON_FETCH_MISSING_BACKENDS":       "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-trtllm-backend-release-cuda-ubuntu-aarch64",
+      "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-aarch64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-trtllm-backend-release-cuda-ubuntu-aarch64",
+      "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-aarch64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json
new file mode 100644
index 0000000000..f63fd405e6
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json
@@ -0,0 +1,73 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonTRTLLMBackend",
+      "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA) — Ampere and newer",
+      "notes": [
+        "TensorRT-LLM backend build for Ubuntu on x86_64.",
+        "The TensorRT-LLM library (tensorrt_llm) must be installed as a Python wheel",
+        "before the Triton server build (it provides C++ headers and shared libs):",
+        "  pip install tensorrt-llm  --extra-index-url https://pypi.nvidia.com",
+        "Requires CUDA 12.x, TensorRT 10.x, and an Ampere or newer GPU (SM 8.0+).",
+        "Run cmake from the server/ source directory."
+      ],
+      "conan_install": [
+        "pip install tensorrt-llm --extra-index-url https://pypi.nvidia.com",
+        "",
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-trtllm-backend-release-cuda-ubuntu-x86_64/conan",
+        "",
+        "cmake --preset triton-trtllm-backend-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-x86_64 --target tensorrtllm-backend"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-trtllm-backend-release-cuda-ubuntu-x86_64",
+      "displayName": "TritonTRTLLMBackend — Release, CUDA, Ubuntu, x86_64",
+      "description": "Triton server + TensorRT-LLM backend for Ubuntu Linux on x86_64. Requires tensorrt-llm wheel installed before cmake configure.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS":       "ON",
+        "CMAKE_BUILD_TYPE":                    "Release",
+        "TRITON_ENABLE_GPU":                   "ON",
+        "TRITON_ENABLE_HTTP":                  "ON",
+        "TRITON_ENABLE_GRPC":                  "ON",
+        "TRITON_ENABLE_METRICS":               "ON",
+        "TRITON_ENABLE_METRICS_GPU":           "ON",
+        "TRITON_ENABLE_METRICS_CPU":           "ON",
+        "TRITON_ENABLE_ENSEMBLE":              "ON",
+        "TRITON_MIN_COMPUTE_CAPABILITY":       "8.0",
+        "CMAKE_CUDA_ARCHITECTURES":            "80;86;89;90",
+        "TRITON_ENABLE_TENSORRTLLM_BACKEND":   "ON",
+        "TRITON_FETCH_MISSING_BACKENDS":       "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-trtllm-backend-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-trtllm-backend-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json
new file mode 100644
index 0000000000..c5e90ece7a
--- /dev/null
+++ b/cmake/presets/CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json
@@ -0,0 +1,72 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "component": "TritonVLLMBackend",
+      "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA)",
+      "notes": [
+        "vLLM backend build for Ubuntu on x86_64.",
+        "The vLLM backend is Python-only — CMake configures the server and enables",
+        "the backend stub so the vLLM Python package is discovered at runtime.",
+        "Install vLLM and its Triton shim before deployment:",
+        "  pip install vllm",
+        "  pip install tritonclient[all]",
+        "Requires CUDA toolkit and an NVIDIA driver (Ampere or newer recommended)."
+      ],
+      "conan_install": [
+        "conan install server/",
+        "  --profile:host=server/conan/profiles/linux-gcc13-release",
+        "  --profile:build=server/conan/profiles/linux-gcc13-release",
+        "  -o '&:enable_gpu=True'",
+        "  -o '&:enable_grpc=True'",
+        "  -o '&:enable_http=True'",
+        "  -o '&:enable_metrics=True'",
+        "  --build=missing",
+        "  --output-folder=server/build/triton-vllm-backend-release-cuda-ubuntu-x86_64/conan",
+        "",
+        "cmake --preset triton-vllm-backend-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-vllm-backend-release-cuda-ubuntu-x86_64",
+        "cmake --build --preset triton-vllm-backend-release-cuda-ubuntu-x86_64 --target vllm-backend"
+      ]
+    }
+  },
+  "configurePresets": [
+    {
+      "name": "triton-vllm-backend-release-cuda-ubuntu-x86_64",
+      "displayName": "TritonVLLMBackend — Release, CUDA, Ubuntu, x86_64",
+      "description": "Triton server + vLLM backend for Ubuntu Linux on x86_64. vLLM backend is Python-only; this preset wires the backend into the server build.",
+      "generator": "Ninja",
+      "binaryDir": "${sourceDir}/build/${presetName}",
+      "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake",
+      "cacheVariables": {
+        "CMAKE_EXPORT_COMPILE_COMMANDS":  "ON",
+        "CMAKE_BUILD_TYPE":               "Release",
+        "TRITON_ENABLE_GPU":              "ON",
+        "TRITON_ENABLE_HTTP":             "ON",
+        "TRITON_ENABLE_GRPC":             "ON",
+        "TRITON_ENABLE_METRICS":          "ON",
+        "TRITON_ENABLE_METRICS_GPU":      "ON",
+        "TRITON_ENABLE_METRICS_CPU":      "ON",
+        "TRITON_ENABLE_ENSEMBLE":         "ON",
+        "TRITON_MIN_COMPUTE_CAPABILITY":  "8.0",
+        "CMAKE_CUDA_ARCHITECTURES":       "80;86;89;90",
+        "TRITON_ENABLE_VLLM_BACKEND":     "ON",
+        "TRITON_FETCH_MISSING_BACKENDS":  "ON"
+      }
+    }
+  ],
+  "buildPresets": [
+    {
+      "name": "triton-vllm-backend-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-vllm-backend-release-cuda-ubuntu-x86_64",
+      "jobs": 8
+    }
+  ],
+  "testPresets": [
+    {
+      "name": "triton-vllm-backend-release-cuda-ubuntu-x86_64",
+      "configurePreset": "triton-vllm-backend-release-cuda-ubuntu-x86_64",
+      "filter": { "exclude": { "label": "requires-gpu" } }
+    }
+  ]
+}
diff --git a/cmake/presets/CMakePresets.json b/cmake/presets/CMakePresets.json
new file mode 100644
index 0000000000..bc25f0c15f
--- /dev/null
+++ b/cmake/presets/CMakePresets.json
@@ -0,0 +1,46 @@
+{
+  "version": 6,
+  "vendor": {
+    "triton": {
+      "description": "Platform-specific preset catalog for all Triton components. Include this file from CMakeUserPresets.json to activate all platform presets.",
+      "components": [
+        "TritonInferenceServer  — server binary + backends (run cmake from server/)",
+        "TritonClient           — C++/Python client libraries (run cmake from client/)",
+        "TritonPerfAnalyzer     — standalone benchmark tool, manylinux only (run cmake from perf_analyzer/)",
+        "TritonVLLMBackend      — server + vLLM Python backend (run cmake from server/)",
+        "TritonTRTLLMBackend    — server + TensorRT-LLM backend (run cmake from server/; install tensorrt-llm wheel first)"
+      ],
+      "usage": [
+        "After running 'conan install', add to CMakeUserPresets.json:",
+        "  {",
+        "    \"version\": 6,",
+        "    \"include\": [",
+        "      \"build/<preset-name>/conan/generators/CMakePresets.json\",",
+        "      \"<path-to-server>/cmake/presets/CMakePresets.json\"",
+        "    ]",
+        "  }",
+        "",
+        "Then: cmake --preset triton-inference-server-release-cuda-ubuntu-x86_64"
+      ]
+    }
+  },
+  "include": [
+    "CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json",
+    "CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json",
+    "CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json",
+    "CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json",
+    "CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json",
+    "CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json",
+    "CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json",
+    "CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json",
+    "CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json",
+    "CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json",
+    "CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json",
+    "CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json",
+    "CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json",
+    "CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json",
+    "CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json",
+    "CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json",
+    "CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json"
+  ]
+}
diff --git a/conan/profiles/linux-gcc13-debug b/conan/profiles/linux-gcc13-debug
new file mode 100644
index 0000000000..51e994b678
--- /dev/null
+++ b/conan/profiles/linux-gcc13-debug
@@ -0,0 +1,13 @@
+[settings]
+os=Linux
+arch=x86_64
+compiler=gcc
+compiler.version=13
+compiler.libcxx=libstdc++11
+compiler.cppstd=17
+build_type=Debug
+
+[buildenv]
+CC=/usr/bin/gcc-13
+CXX=/usr/bin/g++-13
+PATH=+/home/user/.venv/bin
diff --git a/conan/profiles/linux-gcc13-release b/conan/profiles/linux-gcc13-release
new file mode 100644
index 0000000000..4ad3b65fc8
--- /dev/null
+++ b/conan/profiles/linux-gcc13-release
@@ -0,0 +1,15 @@
+[settings]
+os=Linux
+arch=x86_64
+compiler=gcc
+compiler.version=13
+compiler.libcxx=libstdc++11
+compiler.cppstd=17
+build_type=Release
+
+[conf]
+tools.cmake:cmake_program=/home/user/.venv/bin/cmake
+
+[buildenv]
+CC=/usr/bin/gcc-13
+CXX=/usr/bin/g++-13
diff --git a/conan/profiles/linux-gcc13-release-aarch64 b/conan/profiles/linux-gcc13-release-aarch64
new file mode 100644
index 0000000000..235f252017
--- /dev/null
+++ b/conan/profiles/linux-gcc13-release-aarch64
@@ -0,0 +1,12 @@
+[settings]
+os=Linux
+arch=armv8
+compiler=gcc
+compiler.version=13
+compiler.libcxx=libstdc++11
+compiler.cppstd=17
+build_type=Release
+
+[buildenv]
+CC=/usr/bin/aarch64-linux-gnu-gcc-13
+CXX=/usr/bin/aarch64-linux-gnu-g++-13
diff --git a/conan/profiles/linux-gcc13-release-manylinux b/conan/profiles/linux-gcc13-release-manylinux
new file mode 100644
index 0000000000..9dae4a6296
--- /dev/null
+++ b/conan/profiles/linux-gcc13-release-manylinux
@@ -0,0 +1,19 @@
+[settings]
+os=Linux
+arch=x86_64
+compiler=gcc
+compiler.version=13
+compiler.libcxx=libstdc++11
+compiler.cppstd=17
+build_type=Release
+
+[buildenv]
+CC=/usr/bin/gcc-13
+CXX=/usr/bin/g++-13
+PATH=+/home/user/.venv/bin
+
+[conf]
+# Target manylinux2014 (glibc 2.17+) — use the manylinux sysroot when available.
+# Set CONAN_SYSROOT in the environment to point to the manylinux sysroot if
+# cross-compiling; for native builds this profile is equivalent to linux-gcc13-release.
+tools.build:sysroot=
diff --git a/conan/profiles/linux-gcc13-release-manylinux-aarch64 b/conan/profiles/linux-gcc13-release-manylinux-aarch64
new file mode 100644
index 0000000000..235f252017
--- /dev/null
+++ b/conan/profiles/linux-gcc13-release-manylinux-aarch64
@@ -0,0 +1,12 @@
+[settings]
+os=Linux
+arch=armv8
+compiler=gcc
+compiler.version=13
+compiler.libcxx=libstdc++11
+compiler.cppstd=17
+build_type=Release
+
+[buildenv]
+CC=/usr/bin/aarch64-linux-gnu-gcc-13
+CXX=/usr/bin/aarch64-linux-gnu-g++-13
diff --git a/conan/recipes/cnmem/conanfile.py b/conan/recipes/cnmem/conanfile.py
new file mode 100644
index 0000000000..7779a78210
--- /dev/null
+++ b/conan/recipes/cnmem/conanfile.py
@@ -0,0 +1,49 @@
+import os
+from conan import ConanFile
+from conan.tools.cmake import CMake, CMakeToolchain, cmake_layout
+from conan.tools.files import copy, get
+
+
+class CnmemConan(ConanFile):
+    name = "cnmem"
+    version = "1.0.0"
+    description = "NVIDIA cnmem CUDA memory manager (Triton-patched, static library)"
+    license = "BSD-3-Clause"
+    url = "https://github.com/mc-nv/cnmem"
+    topics = ("cuda", "memory", "nvidia", "triton")
+    settings = "os", "compiler", "build_type", "arch"
+    options = {"shared": [True, False]}
+    default_options = {"shared": False}
+
+    # Pinned to HEAD of mc-nv/cnmem (includes all Triton patches + Conan recipe).
+    # Update this SHA when a new release is cut on mc-nv/cnmem.
+    _commit = "81d127414eb67f2ba3ecf82ad074d667e5eed558"
+
+    def source(self):
+        get(self,
+            f"https://github.com/mc-nv/cnmem/archive/{self._commit}.tar.gz",
+            strip_root=True)
+
+    def layout(self):
+        cmake_layout(self, src_folder=".")
+
+    def generate(self):
+        tc = CMakeToolchain(self)
+        tc.variables["BUILD_SHARED_LIBS"] = self.options.shared
+        tc.generate()
+
+    def build(self):
+        cmake = CMake(self)
+        cmake.configure()
+        cmake.build()
+
+    def package(self):
+        CMake(self).install()
+        copy(self, "*.h",
+             os.path.join(self.source_folder, "include"),
+             os.path.join(self.package_folder, "include"))
+
+    def package_info(self):
+        self.cpp_info.set_property("cmake_file_name", "cnmem")
+        self.cpp_info.set_property("cmake_target_name", "cnmem::cnmem")
+        self.cpp_info.libs = ["cnmem"]
diff --git a/conan/recipes/dcgm/conanfile.py b/conan/recipes/dcgm/conanfile.py
new file mode 100644
index 0000000000..782965b730
--- /dev/null
+++ b/conan/recipes/dcgm/conanfile.py
@@ -0,0 +1,36 @@
+import os
+from conan import ConanFile
+
+
+class DcgmConan(ConanFile):
+    name = "dcgm"
+    version = "4.5.3"
+    description = "NVIDIA DataCenter GPU Manager (system package wrapper)"
+    settings = "os", "arch"
+
+    def package_info(self):
+        self.cpp_info.set_property("cmake_file_name", "DCGM")
+        self.cpp_info.set_property("cmake_target_name", "DCGM::dcgm")
+        # Search candidate install locations in priority order:
+        #   1. /usr/local/dcgm  — NVIDIA NGC container image layout
+        #   2. /usr           — apt package (datacenter-gpu-manager) layout
+        _include_candidates = [
+            "/usr/local/dcgm/include",
+            "/usr/include",
+        ]
+        _lib_candidates = [
+            "/usr/local/dcgm/lib",
+            "/usr/local/dcgm/lib64",
+            "/usr/lib/x86_64-linux-gnu",
+            "/usr/lib/aarch64-linux-gnu",
+        ]
+        include_dirs = [d for d in _include_candidates
+                        if os.path.isfile(os.path.join(d, "dcgm_agent.h"))]
+        lib_dirs = [d for d in _lib_candidates
+                    if any(f.startswith("libdcgm") for f in (os.listdir(d) if os.path.isdir(d) else []))]
+        self.cpp_info.includedirs = include_dirs
+        self.cpp_info.libdirs = lib_dirs
+        # Use system_libs so CMakeDeps does not try to locate the library
+        # inside the (empty) Conan package folder; the linker will search
+        # the libdirs paths set via INTERFACE_LINK_DIRECTORIES at build time.
+        self.cpp_info.system_libs = ["dcgm"]
diff --git a/conan/recipes/libevhtp/conanfile.py b/conan/recipes/libevhtp/conanfile.py
new file mode 100644
index 0000000000..d09edacb11
--- /dev/null
+++ b/conan/recipes/libevhtp/conanfile.py
@@ -0,0 +1,76 @@
+from conan import ConanFile
+from conan.tools.cmake import CMake, CMakeToolchain, CMakeDeps, cmake_layout
+import os
+from conan.tools.files import get, replace_in_file
+
+
+class LibevhtpConan(ConanFile):
+    name = "libevhtp"
+    version = "1.2.18"
+    description = "NVIDIA Triton-patched libevhtp flexible HTTP server library"
+    license = "BSD-3-Clause"
+    url = "https://github.com/mc-nv/libevhtp"
+    topics = ("http", "libevent", "triton")
+    settings = "os", "compiler", "build_type", "arch"
+    options = {
+        "shared":         [True, False],
+        "enable_tracing": [True, False],
+    }
+    default_options = {
+        "shared":         False,
+        "enable_tracing": False,
+    }
+
+    # Pinned to HEAD of mc-nv/libevhtp (includes all Triton patches + Conan recipe).
+    # Update this SHA when a new release is cut on mc-nv/libevhtp.
+    _commit = "59956d391d0f77cf144b2af5ec888de760088e22"
+
+    def requirements(self):
+        self.requires("libevent/2.1.12")
+
+    def source(self):
+        get(self,
+            f"https://github.com/mc-nv/libevhtp/archive/{self._commit}.tar.gz",
+            strip_root=True)
+
+    def layout(self):
+        cmake_layout(self, src_folder=".")
+
+    def generate(self):
+        tc = CMakeToolchain(self)
+        tc.variables["EVHTP_DISABLE_REGEX"] = True
+        tc.variables["EVHTP_DISABLE_SSL"] = True
+        tc.variables["EVHTP_TRITON_ENABLE_TRACING"] = self.options.enable_tracing
+        tc.variables["BUILD_SHARED_LIBS"] = self.options.shared
+        # Triton's tritonfrontend Python module links libevhtp into a .so;
+        # ensure all libevhtp object files are compiled with -fPIC.
+        tc.variables["CMAKE_POSITION_INDEPENDENT_CODE"] = True
+        tc.generate()
+        CMakeDeps(self).generate()
+
+    def build(self):
+        # Patch libevhtp CMakeLists.txt to use Conan CMakeDeps imported targets
+        # instead of old-style Find module variables (LIBEVENT_LIBRARIES, etc.).
+        # CMakeDeps creates Libevent::core / Libevent::extra targets but does NOT
+        # set the LIBEVENT_LIBRARIES / LIBEVENT_INCLUDE_DIRS variables that the
+        # upstream CMakeLists.txt expects.
+        replace_in_file(
+            self,
+            os.path.join(self.source_folder, "CMakeLists.txt"),
+            "find_package(Libevent REQUIRED)\n"
+            "list(APPEND LIBEVHTP_EXTERNAL_LIBS ${LIBEVENT_LIBRARIES})\n"
+            "list(APPEND LIBEVHTP_EXTERNAL_INCLUDES ${LIBEVENT_INCLUDE_DIRS})",
+            "find_package(Libevent CONFIG REQUIRED)\n"
+            "list(APPEND LIBEVHTP_EXTERNAL_LIBS libevent::core libevent::extra libevent::pthreads)",
+        )
+        cmake = CMake(self)
+        cmake.configure()
+        cmake.build()
+
+    def package(self):
+        CMake(self).install()
+
+    def package_info(self):
+        self.cpp_info.set_property("cmake_file_name", "libevhtp")
+        self.cpp_info.set_property("cmake_target_name", "libevhtp::evhtp")
+        self.cpp_info.libs = ["evhtp"]
diff --git a/conanfile.py b/conanfile.py
new file mode 100644
index 0000000000..3a06fdaa26
--- /dev/null
+++ b/conanfile.py
@@ -0,0 +1,101 @@
+from conan import ConanFile
+from conan.tools.cmake import CMakeToolchain, CMakeDeps
+from conan.errors import ConanInvalidConfiguration
+
+
+class TritonServerConan(ConanFile):
+    name = "tritonserver"
+    version = "2.68.0"
+    settings = "os", "compiler", "build_type", "arch"
+    options = {
+        "enable_grpc":          [True, False],
+        "enable_http":          [True, False],
+        "enable_metrics":       [True, False],
+        "enable_tracing":       [True, False],
+        "enable_gcs":           [True, False],
+        "enable_s3":            [True, False],
+        "enable_azure_storage": [True, False],
+        "enable_gpu":           [True, False],
+    }
+    default_options = {
+        "enable_grpc":          True,
+        "enable_http":          True,
+        "enable_metrics":       True,
+        "enable_tracing":       False,
+        "enable_gcs":           False,
+        "enable_s3":            False,
+        "enable_azure_storage": False,
+        "enable_gpu":           True,
+    }
+
+    def validate(self):
+        if self.settings.os != "Linux":
+            raise ConanInvalidConfiguration("tritonserver only supports Linux")
+
+    def requirements(self):
+        self.requires("protobuf/3.21.12")
+        self.requires("re2/20230301")
+        self.requires("rapidjson/cci.20230929")
+        self.requires("gtest/1.14.0")
+        self.requires("libcurl/8.18.0")
+        self.requires("nlohmann_json/3.11.3")
+        if self.options.enable_grpc:
+            self.requires("grpc/1.54.3")
+        if self.options.enable_http:
+            self.requires("libevent/2.1.12")
+            self.requires("libevhtp/1.2.18")
+        if self.options.enable_metrics:
+            self.requires("prometheus-cpp/1.2.4")
+        if self.options.enable_tracing:
+            self.requires("opentelemetry-cpp/1.9.1")
+        if self.options.enable_gcs:
+            self.requires("google-cloud-cpp/2.28.0")
+            self.requires("crc32c/1.1.2")
+        if self.options.enable_s3:
+            self.requires("aws-sdk-cpp/1.11.60")
+        if self.options.enable_azure_storage:
+            self.requires("azure-sdk-for-cpp/1.12.0")
+        if self.options.enable_gpu:
+            self.requires("cnmem/1.0.0")
+            self.requires("dcgm/4.5.3")
+
+    def configure(self):
+        self.options["libcurl"].shared = False
+        self.options["libcurl"].with_ssl = "openssl"
+        if self.options.enable_grpc:
+            self.options["grpc"].shared = False
+            self.options["grpc"].cpp_plugin = True
+        if self.options.enable_http:
+            self.options["libevent"].shared = False
+            self.options["libevent"].with_openssl = False
+        if self.options.enable_metrics:
+            self.options["prometheus-cpp"].shared = False
+            self.options["prometheus-cpp"].with_pull = False
+            self.options["prometheus-cpp"].with_push = False
+        if self.options.enable_s3:
+            self.options["aws-sdk-cpp"].shared = False
+            self.options["aws-sdk-cpp"].build_only = "s3"
+
+    def layout(self):
+        # Place generators flat under the output folder so our CMakePresets.json
+        # toolchainFile path (build/<preset>/conan/generators/conan_toolchain.cmake)
+        # resolves correctly without build_type nesting.
+        self.folders.generators = "generators"
+
+    def generate(self):
+        tc = CMakeToolchain(self)
+        ws = self.recipe_folder + "/.."
+        tc.variables["TRITON_COMMON_SOURCE_DIR"]      = ws + "/common"
+        tc.variables["TRITON_CORE_SOURCE_DIR"]        = ws + "/core"
+        tc.variables["TRITON_BACKEND_SOURCE_DIR"]     = ws + "/backend"
+        tc.variables["TRITON_ENABLE_GRPC"]            = self.options.enable_grpc
+        tc.variables["TRITON_ENABLE_HTTP"]            = self.options.enable_http
+        tc.variables["TRITON_ENABLE_METRICS"]         = self.options.enable_metrics
+        tc.variables["TRITON_ENABLE_TRACING"]         = self.options.enable_tracing
+        tc.variables["TRITON_ENABLE_GCS"]             = self.options.enable_gcs
+        tc.variables["TRITON_ENABLE_S3"]              = self.options.enable_s3
+        tc.variables["TRITON_ENABLE_AZURE_STORAGE"]   = self.options.enable_azure_storage
+        tc.variables["TRITON_ENABLE_GPU"]             = self.options.enable_gpu
+        tc.variables["TRITON_SKIP_THIRD_PARTY_FETCH"] = True
+        tc.generate()
+        CMakeDeps(self).generate()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000..42f524142a
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,20 @@
+# Python runtime dependencies for Triton Inference Server
+#
+# TensorRT-LLM is installed as a pre-built wheel — NOT built from source.
+# The wheel provides both the Python bindings and the required C++ shared
+# libraries (libtensorrt_llm.so, libnvinfer_plugin_tensorrt_llm.so, etc.).
+#
+--extra-index-url https://pypi.nvidia.com
+
+# TensorRT-LLM runtime (wheel replaces the TensorRT-LLM/ source clone)
+tensorrt-llm==1.2.0
+
+# OpenAI-compatible HTTP frontend
+fastapi==0.121.2
+httpx==0.27.2
+openai>=1.107.3
+starlette>=0.49.1
+partial-json-parser
+
+# Utilities
+scipy>=1.11.0
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 9568f4280d..cce1143efe 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -24,7 +24,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-cmake_minimum_required (VERSION 3.31.8)
+cmake_minimum_required(VERSION 3.31.8)
 
 project(tritonserverexe LANGUAGES C CXX)
 
@@ -33,38 +33,20 @@ include(GNUInstallDirs)
 #
 # Dependencies
 #
-# We must include the transitive closure of all repos so that we can
-# override the tag. The backend repo is needed for the tests.
+# Internal Triton repos are brought in via add_subdirectory in the parent
+# CMakeLists.txt (server/CMakeLists.txt). Their targets are already in scope.
 #
-include(FetchContent)
-
-FetchContent_Declare(
-  repo-common
-  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/common.git
-  GIT_TAG ${TRITON_COMMON_REPO_TAG}
-)
-FetchContent_Declare(
-  repo-core
-  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git
-  GIT_TAG ${TRITON_CORE_REPO_TAG}
-)
-FetchContent_Declare(
-  repo-backend
-  GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/backend.git
-  GIT_TAG ${TRITON_BACKEND_REPO_TAG}
-)
 
 if(TRITON_ENABLE_GRPC)
   set(TRITON_COMMON_ENABLE_PROTOBUF ON)
   set(TRITON_COMMON_ENABLE_GRPC ON)
 endif() # TRITON_ENABLE_GRPC
 
-FetchContent_MakeAvailable(repo-common repo-core repo-backend)
-
 # CUDA
 #
 if(${TRITON_ENABLE_GPU})
   find_package(CUDAToolkit REQUIRED)
+  set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
   message(STATUS "Using CUDA ${CUDA_VERSION}")
 endif() # TRITON_ENABLE_GPU
 
@@ -209,14 +191,7 @@ if(${TRITON_ENABLE_GPU})
   )
 endif() # TRITON_ENABLE_GPU
 
-if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
-    ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI})
-  target_include_directories(
-    main
-    PRIVATE
-      ${LIBEVENT_INCLUDE_DIRS}
-  )
-endif()
+  # Libevent include dirs come via the Conan-generated target's INTERFACE_INCLUDE_DIRECTORIES
 
 
 if(${TRITON_ENABLE_HTTP})
@@ -401,7 +376,9 @@ if(${TRITON_ENABLE_HTTP}
       triton-common-logging   # from repo-common
       triton-core-serverapi   # from repo-core
       triton-core-serverstub  # from repo-core
-      ${LIBEVENT_LIBRARIES}
+      libevent::core
+      libevent::extra
+      libevent::pthreads
       libevhtp::evhtp
       re2::re2
   )
@@ -654,6 +631,7 @@ if (NOT WIN32)
       triton-common-error             # from repo-common
       triton-core-serverapi           # from repo-core
       triton-core-serverstub          # from repo-core
+      rapidjson
     )
 
   if(${TRITON_ENABLE_GPU})
@@ -718,6 +696,7 @@ if (NOT WIN32)
       triton-common-error             # from repo-common
       triton-core-serverapi           # from repo-core
       triton-core-serverstub          # from repo-core
+      rapidjson
     )
 
   if(${TRITON_ENABLE_GPU})
@@ -789,6 +768,7 @@ if (NOT WIN32)
         triton-common-error             # from repo-common
         triton-core-serverapi           # from repo-core
         triton-core-serverstub          # from repo-core
+        rapidjson
         CUDA::cudart
       )
 
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index f523c4dbdc..f6bf7af2d8 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -57,7 +57,6 @@ if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
     PRIVATE
       ${CMAKE_CURRENT_SOURCE_DIR}/..
       ${GTEST_INCLUDE_DIRS}
-      ${LIBEVENT_INCLUDE_DIRS}
   )
 
   target_link_libraries(
@@ -67,7 +66,8 @@ if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR
       triton-core-serverstub  # from repo-core
       GTest::gtest
       GTest::gtest_main
-      ${LIBEVENT_LIBRARIES}
+      libevent::core
+      libevent::extra
       -lz
   )
 
diff --git a/src/test/distributed_addsub/CMakeLists.txt b/src/test/distributed_addsub/CMakeLists.txt
index 597e70308c..3465225e17 100644
--- a/src/test/distributed_addsub/CMakeLists.txt
+++ b/src/test/distributed_addsub/CMakeLists.txt
@@ -64,7 +64,7 @@ set_target_properties(
   POSITION_INDEPENDENT_CODE ON
   OUTPUT_NAME triton_distributed_addsub
   LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_distributed_addsub.ldscript
-  LINK_FLAGS "-Wl,--version-script libtriton_distributed_addsub.ldscript"
+  LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_distributed_addsub.ldscript"
 )
 
 #
diff --git a/src/test/dyna_sequence/CMakeLists.txt b/src/test/dyna_sequence/CMakeLists.txt
index 2bed7c2d36..9eb93387fd 100644
--- a/src/test/dyna_sequence/CMakeLists.txt
+++ b/src/test/dyna_sequence/CMakeLists.txt
@@ -64,7 +64,7 @@ set_target_properties(
   POSITION_INDEPENDENT_CODE ON
   OUTPUT_NAME triton_dyna_sequence
   LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dyna_sequence.ldscript
-  LINK_FLAGS "-Wl,--version-script libtriton_dyna_sequence.ldscript"
+  LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dyna_sequence.ldscript"
 )
 
 #
diff --git a/src/test/implicit_state/CMakeLists.txt b/src/test/implicit_state/CMakeLists.txt
index 4b6024a9c7..058ca1c80c 100644
--- a/src/test/implicit_state/CMakeLists.txt
+++ b/src/test/implicit_state/CMakeLists.txt
@@ -64,7 +64,7 @@ set_target_properties(
   POSITION_INDEPENDENT_CODE ON
   OUTPUT_NAME triton_implicit_state
   LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_implicit_state.ldscript
-  LINK_FLAGS "-Wl,--version-script libtriton_implicit_state.ldscript"
+  LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_implicit_state.ldscript"
 )
 
 #
diff --git a/src/test/iterative_sequence/CMakeLists.txt b/src/test/iterative_sequence/CMakeLists.txt
index 4a44925524..3a49fd94ff 100644
--- a/src/test/iterative_sequence/CMakeLists.txt
+++ b/src/test/iterative_sequence/CMakeLists.txt
@@ -64,7 +64,7 @@ set_target_properties(
   POSITION_INDEPENDENT_CODE ON
   OUTPUT_NAME triton_iterative_sequence
   LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_iterative_sequence.ldscript
-  LINK_FLAGS "-Wl,--version-script libtriton_iterative_sequence.ldscript"
+  LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_iterative_sequence.ldscript"
 )
 
 #
diff --git a/src/test/query_backend/CMakeLists.txt b/src/test/query_backend/CMakeLists.txt
index 30c33a6b50..feaa93bed2 100644
--- a/src/test/query_backend/CMakeLists.txt
+++ b/src/test/query_backend/CMakeLists.txt
@@ -64,7 +64,7 @@ set_target_properties(
   POSITION_INDEPENDENT_CODE ON
   OUTPUT_NAME triton_query
   LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_query.ldscript
-  LINK_FLAGS "-Wl,--version-script libtriton_query.ldscript"
+  LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_query.ldscript"
 )
 
 #
diff --git a/src/test/repoagent/relocation_repoagent/CMakeLists.txt b/src/test/repoagent/relocation_repoagent/CMakeLists.txt
index d0b97b5d24..881e5c609d 100644
--- a/src/test/repoagent/relocation_repoagent/CMakeLists.txt
+++ b/src/test/repoagent/relocation_repoagent/CMakeLists.txt
@@ -61,7 +61,7 @@ set_target_properties(
   POSITION_INDEPENDENT_CODE ON
   OUTPUT_NAME tritonrepoagent_relocation
   LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtritonrepoagent_relocation.ldscript
-  LINK_FLAGS "-Wl,--version-script libtritonrepoagent_relocation.ldscript"
+  LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtritonrepoagent_relocation.ldscript"
 )
 
 #
diff --git a/src/test/sequence/CMakeLists.txt b/src/test/sequence/CMakeLists.txt
index ab105f20eb..2dbfb5a0b9 100644
--- a/src/test/sequence/CMakeLists.txt
+++ b/src/test/sequence/CMakeLists.txt
@@ -64,7 +64,7 @@ set_target_properties(
   POSITION_INDEPENDENT_CODE ON
   OUTPUT_NAME triton_sequence
   LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_sequence.ldscript
-  LINK_FLAGS "-Wl,--version-script libtriton_sequence.ldscript"
+  LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_sequence.ldscript"
 )
 
 #