diff --git a/.gitignore b/.gitignore index 31f5d9f5f4..471832c119 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ qa/L0_openai/openai tensorrtllm_models tensorrtllm_mistral_models/ custom_tokenizer +CMakeUserPresets.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 68f68e1a69..8b555ba9bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,10 +30,20 @@ project(tritonserver LANGUAGES C CXX) include(CMakeDependentOption) +# Expose local cmake/ modules (FindDCGM, TritonCheckCxxAbi, etc.) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +# Validate that the C++ ABI (_GLIBCXX_USE_CXX11_ABI) matches the ABI used +# to compile Conan-provided binary packages. Must run after the toolchain +# file (conan_toolchain.cmake) has been processed so CONAN_COMPILER_LIBCXX +# is populated. +include(TritonCheckCxxAbi) +triton_check_cxx_abi() + # Use C++17 standard as Triton's minimum required. set(TRITON_MIN_CXX_STANDARD 17 CACHE STRING "The minimum C++ standard which features are requested to build this target.") -set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" ) +set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library") option(TRITON_ENABLE_LOGGING "Include logging support in server" ON) option(TRITON_ENABLE_STATS "Include statistics collections in server" ON) @@ -41,9 +51,9 @@ option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF) option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF) option(TRITON_ENABLE_GPU "Enable GPU support in server" ON) option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF) -option(TRITON_IGPU_BUILD "Enable options for iGPU compilation in sever" OFF) +option(TRITON_IGPU_BUILD "Enable options for iGPU compilation in server" OFF) set(TRITON_MIN_COMPUTE_CAPABILITY "7.5" CACHE STRING - "The minimum CUDA compute capability supported by Triton" ) + "The minimum CUDA compute capability supported by Triton") set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build") # Ensemble @@ -71,18 +81,72 @@ option(TRITON_ENABLE_TENSORRT "Include TensorRT backend in server" OFF) # ASAN option(TRITON_ENABLE_ASAN "Build with address sanitizer" OFF) -# Repo tags -set(TRITON_REPO_ORGANIZATION "https://github.com/triton-inference-server" CACHE STRING "Git repository to pull from") -set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING - "Tag for triton-inference-server/third_party repo") -set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo") -set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo") -set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo") - -# Third-party location -set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build") -set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source") +# Skip FetchContent/ExternalProject for third-party deps when Conan provides them +option(TRITON_SKIP_THIRD_PARTY_FETCH + "Skip FetchContent/ExternalProject for third-party deps (provided by Conan)" ON) + +# --------------------------------------------------------------------------- +# Backend implementations — each defaults OFF; toggle ON to include in build. +# Source dirs default to sibling checkout paths; override via -D if needed. +# --------------------------------------------------------------------------- +option(TRITON_ENABLE_PYTHON_BACKEND "Build Python backend" OFF) +option(TRITON_ENABLE_ONNXRUNTIME_BACKEND "Build ONNX Runtime backend" OFF) +option(TRITON_ENABLE_PYTORCH_BACKEND "Build PyTorch backend" OFF) +option(TRITON_ENABLE_TENSORRT_BACKEND "Build TensorRT backend" OFF) +option(TRITON_ENABLE_OPENVINO_BACKEND "Build OpenVINO backend" OFF) +option(TRITON_ENABLE_DALI_BACKEND "Build DALI backend" OFF) +option(TRITON_ENABLE_FIL_BACKEND "Build FIL backend" OFF) +option(TRITON_ENABLE_VLLM_BACKEND "Build vLLM backend" OFF) +option(TRITON_ENABLE_TENSORRTLLM_BACKEND "Build TensorRT-LLM backend" OFF) +option(TRITON_ENABLE_IDENTITY_BACKEND "Build Identity (test) backend" OFF) +option(TRITON_ENABLE_REPEAT_BACKEND "Build Repeat (test) backend" OFF) +option(TRITON_ENABLE_SQUARE_BACKEND "Build Square (test) backend" OFF) + +# Source dirs — prefer sibling checkouts; FetchContent is the fallback. +# Override per-backend with -DTRITON__BACKEND_SOURCE_DIR=. +set(TRITON_PYTHON_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../python_backend" + CACHE PATH "Local checkout of python_backend repo") +set(TRITON_ONNXRUNTIME_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../onnxruntime_backend" + CACHE PATH "Local checkout of onnxruntime_backend repo") +set(TRITON_PYTORCH_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../pytorch_backend" + CACHE PATH "Local checkout of pytorch_backend repo") +set(TRITON_TENSORRT_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../tensorrt_backend" + CACHE PATH "Local checkout of tensorrt_backend repo") +set(TRITON_OPENVINO_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../openvino_backend" + CACHE PATH "Local checkout of openvino_backend repo") +set(TRITON_DALI_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../dali_backend" + CACHE PATH "Local checkout of dali_backend repo") +set(TRITON_FIL_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../fil_backend" + CACHE PATH "Local checkout of fil_backend repo") +set(TRITON_VLLM_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../vllm_backend" + CACHE PATH "Local checkout of vllm_backend repo") +set(TRITON_TENSORRTLLM_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../tensorrtllm_backend" + CACHE PATH "Local checkout of tensorrtllm_backend repo") +set(TRITON_IDENTITY_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../identity_backend" + CACHE PATH "Local checkout of identity_backend repo") +set(TRITON_REPEAT_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../repeat_backend" + CACHE PATH "Local checkout of repeat_backend repo") +set(TRITON_SQUARE_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../square_backend" + CACHE PATH "Local checkout of square_backend repo") + +# Git tags used when a backend is fetched (no local checkout found). +# Override per-backend with -DTRITON__BACKEND_REPO_TAG=. +set(TRITON_PYTHON_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for python_backend FetchContent") +set(TRITON_ONNXRUNTIME_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for onnxruntime_backend FetchContent") +set(TRITON_PYTORCH_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for pytorch_backend FetchContent") +set(TRITON_TENSORRT_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for tensorrt_backend FetchContent") +set(TRITON_OPENVINO_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for openvino_backend FetchContent") +set(TRITON_DALI_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for dali_backend FetchContent") +set(TRITON_FIL_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for fil_backend FetchContent") +set(TRITON_VLLM_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for vllm_backend FetchContent") +set(TRITON_TENSORRTLLM_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for tensorrtllm_backend FetchContent") +set(TRITON_IDENTITY_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for identity_backend FetchContent") +set(TRITON_REPEAT_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for repeat_backend FetchContent") +set(TRITON_SQUARE_BACKEND_REPO_TAG "main" CACHE STRING "Git ref for square_backend FetchContent") +# +# Validation +# if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS) message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON") endif() @@ -91,15 +155,15 @@ if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS) message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON") endif() -if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS) +if(TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS) message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON") endif() -if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS) +if(TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS) message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON") endif() -if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU) +if(TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU) message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON") endif() @@ -108,162 +172,431 @@ if(TRITON_ENABLE_ASAN AND TRITON_ENABLE_GPU) endif() # -# Dependencies +# Internal Triton repos — prefer local checkouts, fall back to FetchContent. +# Override source dirs via -DTRITON_*_SOURCE_DIR=. +# Override fetch tags via -DTRITON_*_REPO_TAG=. # +set(TRITON_COMMON_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../common" + CACHE PATH "Local checkout of triton-inference-server/common (empty = FetchContent)") +set(TRITON_CORE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../core" + CACHE PATH "Local checkout of triton-inference-server/core (empty = FetchContent)") +set(TRITON_BACKEND_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../backend" + CACHE PATH "Local checkout of triton-inference-server/backend (empty = FetchContent)") + +set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Git tag/branch for common when using FetchContent") +set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Git tag/branch for core when using FetchContent") +set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Git tag/branch for backend when using FetchContent") + include(FetchContent) -FetchContent_Declare( - repo-core - GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git - GIT_TAG ${TRITON_CORE_REPO_TAG} -) -FetchContent_Declare( - repo-third-party - GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/third_party.git - GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG} -) +macro(triton_add_internal_repo _name _src_dir _repo_url _repo_tag _subdir) + if(EXISTS "${${_src_dir}}/CMakeLists.txt") + message(STATUS "triton/${_name}: using local checkout at ${${_src_dir}}") + add_subdirectory(${${_src_dir}} ${_subdir}) + else() + message(STATUS "triton/${_name}: local checkout not found — fetching ${${_repo_tag}} from ${_repo_url}") + FetchContent_Declare( + ${_name} + GIT_REPOSITORY ${_repo_url} + GIT_TAG ${${_repo_tag}} + GIT_SHALLOW TRUE + ) + FetchContent_MakeAvailable(${_name}) + endif() +endmacro() -# Some libs are installed to ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib64 instead -# of ${TRITON_THIRD_PARTY_INSTALL_PREFIX}/{LIB}/lib on Centos -set(LIB_DIR "lib") -if(LINUX) - file(STRINGS "/etc/os-release" DISTRO_ID_LIKE REGEX "ID_LIKE") - if(${DISTRO_ID_LIKE} MATCHES "rhel|centos") - set (LIB_DIR "lib64") - endif(${DISTRO_ID_LIKE} MATCHES "rhel|centos") -endif(LINUX) set(TRITON_CORE_HEADERS_ONLY OFF) -FetchContent_MakeAvailable(repo-third-party repo-core) - # -# Triton server executable and examples +# External C++ deps — resolved by Conan; conan_toolchain.cmake sets CMAKE_PREFIX_PATH. +# These must be found BEFORE add_subdirectory(core) so that imported targets +# (e.g. cnmem::cnmem) exist when core/src/CMakeLists.txt evaluates them. # +find_package(re2 REQUIRED) +find_package(Protobuf CONFIG REQUIRED) +find_package(RapidJSON CONFIG REQUIRED) +find_package(Boost REQUIRED) -# Need to use ExternalProject for our builds so that we can get the -# correct dependencies between Triton executable and the -# ExternalProject dependencies (found in the third_party repo) -include(ExternalProject) +if(TRITON_ENABLE_GRPC) + find_package(gRPC CONFIG REQUIRED) +endif() -# If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external -# projects. -set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "") -if (CMAKE_TOOLCHAIN_FILE) - set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}") +if(TRITON_ENABLE_HTTP OR TRITON_ENABLE_METRICS OR + TRITON_ENABLE_SAGEMAKER OR TRITON_ENABLE_VERTEX_AI) + find_package(Libevent CONFIG REQUIRED) + find_package(libevhtp CONFIG REQUIRED) endif() -# If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external -# projects. -set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "") -if (VCPKG_TARGET_TRIPLET) - set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}") +if(TRITON_ENABLE_METRICS) + find_package(prometheus-cpp CONFIG REQUIRED) endif() -# If OPENSSL_ROOT_DIR is set, propagate that hint path to the external -# projects with OpenSSL dependency. -set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "") -if (OPENSSL_ROOT_DIR) - set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}") +if(TRITON_ENABLE_TRACING) + find_package(opentelemetry-cpp CONFIG REQUIRED) + find_package(CURL CONFIG REQUIRED) + find_package(nlohmann_json CONFIG REQUIRED) endif() -# Location where protobuf-config.cmake will be installed varies by -# platform -if (WIN32) - set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake") -else() - set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/${LIB_DIR}/cmake/protobuf") +if(TRITON_ENABLE_GCS) + find_package(google_cloud_cpp_storage CONFIG REQUIRED) + find_package(Crc32c CONFIG REQUIRED) endif() -# Triton with Opentelemetry is not supported on Windows -# FIXME: add location for Windows, when support is added -# JIRA DLIS-4786 -if (WIN32) - set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "") -else() - set(_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/opentelemetry-cpp/${LIB_DIR}/cmake/opentelemetry-cpp") +if(TRITON_ENABLE_S3) + find_package(AWSSDK CONFIG REQUIRED COMPONENTS s3) +endif() + +if(TRITON_ENABLE_AZURE_STORAGE) + find_package(azure-storage-blobs-cpp CONFIG REQUIRED) endif() +if(TRITON_ENABLE_GPU) + # Must be found before add_subdirectory(core): core/src/CMakeLists.txt uses + # if(TARGET cnmem::cnmem) to select the Conan-provided target vs the fallback. + find_package(cnmem CONFIG REQUIRED) + find_package(DCGM CONFIG REQUIRED) +endif() + +# Enable protobuf and gRPC in common before adding it as a subdirectory so +# that model_config.pb.h and proto targets are generated when core/src needs them. +set(TRITON_COMMON_ENABLE_PROTOBUF ON CACHE BOOL "Enable protobuf in common" FORCE) +if(TRITON_ENABLE_GRPC) + set(TRITON_COMMON_ENABLE_GRPC ON CACHE BOOL "Enable gRPC in common" FORCE) +endif() +set(TRITON_COMMON_ENABLE_JSON ON CACHE BOOL "Enable JSON in common" FORCE) + +triton_add_internal_repo( + triton-common TRITON_COMMON_SOURCE_DIR + "https://github.com/triton-inference-server/common.git" + TRITON_COMMON_REPO_TAG triton-common) + +triton_add_internal_repo( + triton-core TRITON_CORE_SOURCE_DIR + "https://github.com/triton-inference-server/core.git" + TRITON_CORE_REPO_TAG triton-core-top) + +triton_add_internal_repo( + triton-backend TRITON_BACKEND_SOURCE_DIR + "https://github.com/triton-inference-server/backend.git" + TRITON_BACKEND_REPO_TAG triton-backend) + +# Compatibility aliases for legacy FetchContent variable names still used by +# tests in server/src/test/ (repo-core_SOURCE_DIR, etc.). +set(repo-common_SOURCE_DIR "${TRITON_COMMON_SOURCE_DIR}" CACHE INTERNAL "") +set(repo-core_SOURCE_DIR "${TRITON_CORE_SOURCE_DIR}" CACHE INTERNAL "") +set(repo-backend_SOURCE_DIR "${TRITON_BACKEND_SOURCE_DIR}" CACHE INTERNAL "") + if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install) else() set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) endif() -set(TRITON_DEPENDS triton-core protobuf googletest re2) -if(${TRITON_ENABLE_GCS}) - set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp) -endif() # TRITON_ENABLE_GCS -if(${TRITON_ENABLE_S3}) - set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp) -endif() # TRITON_ENABLE_S3 -if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI}) - set(TRITON_DEPENDS ${TRITON_DEPENDS} libevent libevhtp) -endif() # TRITON_ENABLE_HTTP || TRITON_ENABLE_METRICS || TRITON_ENABLE_SAGEMAKER || TRITON_ENABLE_VERTEX_AI -if(${TRITON_ENABLE_GRPC}) - set(TRITON_DEPENDS ${TRITON_DEPENDS} grpc) -endif() # TRITON_ENABLE_GRPC -if(NOT WIN32 AND ${TRITON_ENABLE_TRACING}) - set(TRITON_DEPENDS ${TRITON_DEPENDS} opentelemetry-cpp) -endif() # TRITON_ENABLE_TRACING - -ExternalProject_Add(triton-server - PREFIX triton-server - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src" - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-server" - CMAKE_CACHE_ARGS - -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR} - ${_CMAKE_ARGS_OPENSSL_ROOT_DIR} - ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE} - ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET} - -DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest - -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc - -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/${LIB_DIR}/cmake/c-ares - -Dre2_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/re2/${LIB_DIR}/cmake/re2 - -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/${LIB_DIR}/cmake/absl - -DCURL_DIR:STRING=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/curl/${LIB_DIR}/cmake/CURL - -Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/share/cmake/nlohmann_json - -DLibevent_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevent/lib/cmake/libevent - -Dlibevhtp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/libevhtp/lib/cmake/libevhtp - -Dstorage_client_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/storage_client - -Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/${LIB_DIR}/cmake/google_cloud_cpp_common - -DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/${LIB_DIR}/cmake/Crc32c - -DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/AWSSDK - -Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-core - -Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/cmake/aws-cpp-sdk-s3 - -Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-event-stream/cmake - -Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-c-common/cmake - -Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/${LIB_DIR}/aws-checksums/cmake - -Dopentelemetry-cpp_DIR:PATH=${_FINDPACKAGE_OPENTELEMETRY_CONFIG_DIR} - -DTRITON_REPO_ORGANIZATION:STRING=${TRITON_REPO_ORGANIZATION} - -DTRITON_IGPU_BUILD:BOOL=${TRITON_IGPU_BUILD} - -DTRITON_THIRD_PARTY_REPO_TAG:STRING=${TRITON_THIRD_PARTY_REPO_TAG} - -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG} - -DTRITON_CORE_REPO_TAG:STRING=${TRITON_CORE_REPO_TAG} - -DTRITON_BACKEND_REPO_TAG:STRING=${TRITON_BACKEND_REPO_TAG} - -DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS} - -DTRITON_ENABLE_ASAN:BOOL=${TRITON_ENABLE_ASAN} - -DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX} - -DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING} - -DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING} - -DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS} - -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU} - -DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU} - -DTRITON_ENABLE_HTTP:BOOL=${TRITON_ENABLE_HTTP} - -DTRITON_ENABLE_SAGEMAKER:BOOL=${TRITON_ENABLE_SAGEMAKER} - -DTRITON_ENABLE_VERTEX_AI:BOOL=${TRITON_ENABLE_VERTEX_AI} - -DTRITON_ENABLE_GRPC:BOOL=${TRITON_ENABLE_GRPC} - -DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY} - -DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS} - -DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU} - -DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU} - -DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS} - -DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE} - -DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3} - -DTRITON_ENABLE_TENSORRT:BOOL=${TRITON_ENABLE_TENSORRT} - -DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE} - -DTRITON_MIN_CXX_STANDARD:STRING=${TRITON_MIN_CXX_STANDARD} - -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} - -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX} - -DTRITON_VERSION:STRING=${TRITON_VERSION} - DEPENDS ${TRITON_DEPENDS} +# +# Triton server executable +# +add_subdirectory(src) + +# --------------------------------------------------------------------------- +# Backend implementations +# +# Each backend is included via add_subdirectory. When enabled but no local +# checkout is found, FetchContent downloads the repo automatically (requires +# network access at configure time). +# +# Build a single backend: +# cmake --build --preset --target -backend +# +# Build all enabled backends: +# cmake --build --preset --target backends +# +# Install all enabled backends (produces deployable backend packages): +# cmake --install build/ --prefix /opt/tritonserver +# +# For standalone backend builds (without the full server), backends can link +# against the triton::tritonserver_stub interface target which provides the +# TRITONSERVER_* API headers without requiring libtritonserver.so at build time. +# --------------------------------------------------------------------------- + +# Controls whether missing backend checkouts are fetched automatically. +option(TRITON_FETCH_MISSING_BACKENDS + "Automatically fetch backends via FetchContent when local checkout is absent" ON) + +# All backends pass these flags into their sub-build so they skip their own +# FetchContent fetches for common/core (already in scope from this build). +set(_backend_passthrough + -DTRITON_SKIP_THIRD_PARTY_FETCH=ON + -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} +) + +# _triton_backend_label tracks which add_subdirectory labels were registered +# so we can build the 'backends' aggregate target afterwards. +set(_triton_backend_labels "") + +macro(triton_add_backend _opt _dir _repo_name _repo_tag_var _label) + if(${_opt}) + if(EXISTS "${${_dir}}/CMakeLists.txt") + message(STATUS "triton/${_repo_name}: using local checkout at ${${_dir}}") + add_subdirectory("${${_dir}}" "${_label}") + list(APPEND _triton_backend_labels "${_label}") + elseif(EXISTS "${${_dir}}") + # Directory present but no CMakeLists.txt — Python-only backend. + # No CMake compilation step; deployment is via pip install / wheel. + message(STATUS "triton/${_repo_name}: Python-only backend at ${${_dir}} — no CMake build step") + elseif(TRITON_FETCH_MISSING_BACKENDS) + message(STATUS "triton/${_repo_name}: not found locally — fetching " + "https://github.com/triton-inference-server/${_repo_name}.git " + "@${${_repo_tag_var}}") + FetchContent_Declare( + ${_repo_name} + GIT_REPOSITORY "https://github.com/triton-inference-server/${_repo_name}.git" + GIT_TAG "${${_repo_tag_var}}" + GIT_SHALLOW TRUE + SOURCE_DIR "${${_dir}}" + ) + FetchContent_GetProperties(${_repo_name}) + if(NOT ${_repo_name}_POPULATED) + FetchContent_Populate(${_repo_name}) + endif() + if(EXISTS "${${_dir}}/CMakeLists.txt") + add_subdirectory("${${_dir}}" "${_label}") + list(APPEND _triton_backend_labels "${_label}") + else() + message(STATUS "triton/${_repo_name}: fetched — Python-only backend, no CMake build step") + endif() + else() + message(WARNING + "${_opt}=ON but '${${_dir}}' not found and " + "TRITON_FETCH_MISSING_BACKENDS=OFF — skipping ${_repo_name}") + endif() + endif() +endmacro() + +triton_add_backend(TRITON_ENABLE_PYTHON_BACKEND TRITON_PYTHON_BACKEND_SOURCE_DIR + python_backend TRITON_PYTHON_BACKEND_REPO_TAG python_backend) +triton_add_backend(TRITON_ENABLE_ONNXRUNTIME_BACKEND TRITON_ONNXRUNTIME_BACKEND_SOURCE_DIR + onnxruntime_backend TRITON_ONNXRUNTIME_BACKEND_REPO_TAG onnxruntime_backend) +triton_add_backend(TRITON_ENABLE_PYTORCH_BACKEND TRITON_PYTORCH_BACKEND_SOURCE_DIR + pytorch_backend TRITON_PYTORCH_BACKEND_REPO_TAG pytorch_backend) +triton_add_backend(TRITON_ENABLE_TENSORRT_BACKEND TRITON_TENSORRT_BACKEND_SOURCE_DIR + tensorrt_backend TRITON_TENSORRT_BACKEND_REPO_TAG tensorrt_backend) +triton_add_backend(TRITON_ENABLE_OPENVINO_BACKEND TRITON_OPENVINO_BACKEND_SOURCE_DIR + openvino_backend TRITON_OPENVINO_BACKEND_REPO_TAG openvino_backend) +triton_add_backend(TRITON_ENABLE_DALI_BACKEND TRITON_DALI_BACKEND_SOURCE_DIR + dali_backend TRITON_DALI_BACKEND_REPO_TAG dali_backend) +triton_add_backend(TRITON_ENABLE_FIL_BACKEND TRITON_FIL_BACKEND_SOURCE_DIR + fil_backend TRITON_FIL_BACKEND_REPO_TAG fil_backend) +triton_add_backend(TRITON_ENABLE_VLLM_BACKEND TRITON_VLLM_BACKEND_SOURCE_DIR + vllm_backend TRITON_VLLM_BACKEND_REPO_TAG vllm_backend) +triton_add_backend(TRITON_ENABLE_TENSORRTLLM_BACKEND TRITON_TENSORRTLLM_BACKEND_SOURCE_DIR + tensorrtllm_backend TRITON_TENSORRTLLM_BACKEND_REPO_TAG tensorrtllm_backend) +triton_add_backend(TRITON_ENABLE_IDENTITY_BACKEND TRITON_IDENTITY_BACKEND_SOURCE_DIR + identity_backend TRITON_IDENTITY_BACKEND_REPO_TAG identity_backend) +triton_add_backend(TRITON_ENABLE_REPEAT_BACKEND TRITON_REPEAT_BACKEND_SOURCE_DIR + repeat_backend TRITON_REPEAT_BACKEND_REPO_TAG repeat_backend) +triton_add_backend(TRITON_ENABLE_SQUARE_BACKEND TRITON_SQUARE_BACKEND_SOURCE_DIR + square_backend TRITON_SQUARE_BACKEND_REPO_TAG square_backend) + +# 'backends' aggregate target — builds all enabled backends via cmake --target backends. +# Individual targets: cmake --build --preset

--target -backend +# (each backend's CMakeLists.txt is expected to define install() rules that place +# the backend .so under ${CMAKE_INSTALL_PREFIX}/backends//) +add_custom_target(backends + COMMENT "Building all enabled backend implementations" ) +foreach(_bl ${_triton_backend_labels}) + # Create a per-backend target alias: python-backend, onnxruntime-backend, etc. + # The label (e.g. "python_backend") maps to the add_subdirectory binary subdir; + # cmake --build with that subdir name builds all targets from that backend. + string(REPLACE "_" "-" _backend_target_name "${_bl}") + add_custom_target("${_backend_target_name}" + COMMAND ${CMAKE_COMMAND} --build "${CMAKE_BINARY_DIR}/${_bl}" + --config $ + COMMENT "Building ${_bl}" + VERBATIM + ) + add_dependencies(backends "${_backend_target_name}") +endforeach() + +# --------------------------------------------------------------------------- +# Triton plugins: repository agents and cache providers +# +# These are server-side extension plugins that are shipped alongside the server +# but live in separate repositories. Each can be built standalone from its own +# checkout, or included here via the same local-checkout / FetchContent pattern. +# +# Build a single plugin: +# cmake --build --preset --target +# +# Build all enabled plugins: +# cmake --build --preset --target plugins +# --------------------------------------------------------------------------- + +option(TRITON_ENABLE_CHECKSUM_REPO_AGENT "Build checksum repository agent" OFF) +option(TRITON_ENABLE_LOCAL_CACHE "Build local response cache" OFF) +option(TRITON_ENABLE_REDIS_CACHE "Build Redis response cache" OFF) + +set(TRITON_CHECKSUM_REPO_AGENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../checksum_repository_agent" + CACHE PATH "Local checkout of checksum_repository_agent repo") +set(TRITON_LOCAL_CACHE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../local_cache" + CACHE PATH "Local checkout of local_cache repo") +set(TRITON_REDIS_CACHE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../redis_cache" + CACHE PATH "Local checkout of redis_cache repo") + +set(TRITON_CHECKSUM_REPO_AGENT_REPO_TAG "main" CACHE STRING "Git ref for checksum_repository_agent FetchContent") +set(TRITON_LOCAL_CACHE_REPO_TAG "main" CACHE STRING "Git ref for local_cache FetchContent") +set(TRITON_REDIS_CACHE_REPO_TAG "main" CACHE STRING "Git ref for redis_cache FetchContent") + +set(_triton_plugin_labels "") + +macro(triton_add_plugin _opt _dir _repo_name _repo_tag_var _label) + if(${_opt}) + if(EXISTS "${${_dir}}/CMakeLists.txt") + message(STATUS "triton/${_repo_name}: using local checkout at ${${_dir}}") + add_subdirectory("${${_dir}}" "${_label}") + elseif(TRITON_FETCH_MISSING_BACKENDS) + message(STATUS "triton/${_repo_name}: not found locally — fetching " + "https://github.com/triton-inference-server/${_repo_name}.git " + "@${${_repo_tag_var}}") + FetchContent_Declare( + ${_repo_name} + GIT_REPOSITORY "https://github.com/triton-inference-server/${_repo_name}.git" + GIT_TAG "${${_repo_tag_var}}" + GIT_SHALLOW TRUE + SOURCE_DIR "${${_dir}}" + ) + FetchContent_MakeAvailable(${_repo_name}) + else() + message(WARNING + "${_opt}=ON but '${${_dir}}' not found and " + "TRITON_FETCH_MISSING_BACKENDS=OFF — skipping ${_repo_name}") + endif() + list(APPEND _triton_plugin_labels "${_label}") + endif() +endmacro() + +triton_add_plugin(TRITON_ENABLE_CHECKSUM_REPO_AGENT TRITON_CHECKSUM_REPO_AGENT_SOURCE_DIR + checksum_repository_agent TRITON_CHECKSUM_REPO_AGENT_REPO_TAG checksum_repository_agent) +triton_add_plugin(TRITON_ENABLE_LOCAL_CACHE TRITON_LOCAL_CACHE_SOURCE_DIR + local_cache TRITON_LOCAL_CACHE_REPO_TAG local_cache) +triton_add_plugin(TRITON_ENABLE_REDIS_CACHE TRITON_REDIS_CACHE_SOURCE_DIR + redis_cache TRITON_REDIS_CACHE_REPO_TAG redis_cache) + +# 'plugins' aggregate target — builds all enabled repo agents and cache providers. +add_custom_target(plugins + COMMENT "Building all enabled Triton plugin extensions" +) +foreach(_pl ${_triton_plugin_labels}) + string(REPLACE "_" "-" _plugin_target_name "${_pl}") + add_custom_target("${_plugin_target_name}" + COMMAND ${CMAKE_COMMAND} --build "${CMAKE_BINARY_DIR}/${_pl}" + --config $ + COMMENT "Building ${_pl}" + VERBATIM + ) + add_dependencies(plugins "${_plugin_target_name}") +endforeach() + +# --------------------------------------------------------------------------- +# Client & perf_analyzer — optional; buildable without the server library. +# When TRITON_SERVER_LIBRARY is empty/not found, downstream CMakeLists.txt +# must link against the stub target (provided via core/include/): +# target_link_libraries( PRIVATE triton::tritonserver_stub) +# --------------------------------------------------------------------------- +option(TRITON_ENABLE_CLIENT "Build Triton client libraries" OFF) +option(TRITON_ENABLE_PERF_ANALYZER "Build perf_analyzer tool" OFF) + +set(TRITON_CLIENT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../client" + CACHE PATH "Local checkout of client repo") +set(TRITON_PERF_ANALYZER_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../perf_analyzer" + CACHE PATH "Local checkout of perf_analyzer repo") + +set(TRITON_CLIENT_REPO_TAG "main" CACHE STRING "Git ref for client FetchContent fallback") +set(TRITON_PERF_ANALYZER_REPO_TAG "main" CACHE STRING "Git ref for perf_analyzer FetchContent fallback") + +# Provide a header-only stub target so client/perf_analyzer can build +# without libtritonserver.so present (they communicate via gRPC/HTTP only). +if(TRITON_ENABLE_CLIENT OR TRITON_ENABLE_PERF_ANALYZER) + if(NOT TARGET triton::tritonserver_stub) + add_library(tritonserver_stub INTERFACE) + add_library(triton::tritonserver_stub ALIAS tritonserver_stub) + target_include_directories(tritonserver_stub INTERFACE + ${TRITON_CORE_SOURCE_DIR}/include) + endif() +endif() + +if(TRITON_ENABLE_CLIENT) + triton_add_internal_repo( + triton-client TRITON_CLIENT_SOURCE_DIR + "https://github.com/triton-inference-server/client.git" + TRITON_CLIENT_REPO_TAG client) +endif() + +if(TRITON_ENABLE_PERF_ANALYZER) + if(EXISTS "${TRITON_PERF_ANALYZER_SOURCE_DIR}/CMakeLists.txt") + message(STATUS "triton/perf_analyzer: using local checkout at ${TRITON_PERF_ANALYZER_SOURCE_DIR}") + add_subdirectory(${TRITON_PERF_ANALYZER_SOURCE_DIR} perf_analyzer) + else() + message(STATUS "triton/perf_analyzer: local checkout not found — fetching ${TRITON_PERF_ANALYZER_REPO_TAG}") + FetchContent_Declare( + triton-perf-analyzer + GIT_REPOSITORY "https://github.com/triton-inference-server/perf_analyzer.git" + GIT_TAG ${TRITON_PERF_ANALYZER_REPO_TAG} + GIT_SHALLOW TRUE + ) + FetchContent_MakeAvailable(triton-perf-analyzer) + endif() +endif() + +# --------------------------------------------------------------------------- +# Python wheel targets (Req 29) +# Built AFTER the main CMake compilation as an explicit additional step: +# cmake --build --preset --target wheels +# +# Individual wheel targets: +# tritonserver-wheel — tritonserver Python SDK wheel (from core/python/) +# client-wheel — tritonclient wheels (from client/) +# wheels — convenience target that builds all wheels +# --------------------------------------------------------------------------- +find_package(Python3 QUIET COMPONENTS Interpreter) + +if(Python3_FOUND) + # Server Python SDK wheel (core/python/tritonserver package) + set(_server_py_dir "${CMAKE_CURRENT_SOURCE_DIR}/../core/python") + if(EXISTS "${_server_py_dir}/setup.py") + add_custom_target(tritonserver-wheel + COMMAND ${Python3_EXECUTABLE} -m pip wheel + --no-deps + --wheel-dir "${CMAKE_BINARY_DIR}/wheels" + "${_server_py_dir}" + COMMENT "Building tritonserver Python wheel" + VERBATIM + ) + endif() + + # Client wheels (tritonclient[grpc] + tritonclient[http]) + set(_client_py_dir "${CMAKE_CURRENT_SOURCE_DIR}/../client") + if(EXISTS "${_client_py_dir}/setup.py" OR EXISTS "${_client_py_dir}/pyproject.toml") + add_custom_target(client-wheel + COMMAND ${Python3_EXECUTABLE} -m pip wheel + --no-deps + --wheel-dir "${CMAKE_BINARY_DIR}/wheels" + "${_client_py_dir}" + COMMENT "Building tritonclient Python wheels" + VERBATIM + ) + endif() + + # Convenience umbrella target + add_custom_target(wheels + COMMENT "Building all Python wheels into ${CMAKE_BINARY_DIR}/wheels/" + ) + if(TARGET tritonserver-wheel) + add_dependencies(wheels tritonserver-wheel) + endif() + if(TARGET client-wheel) + add_dependencies(wheels client-wheel) + endif() +else() + message(STATUS "Python3 not found — 'wheels' target will not be available") +endif() diff --git a/CMakePresets.json b/CMakePresets.json new file mode 100644 index 0000000000..21b4cd4199 --- /dev/null +++ b/CMakePresets.json @@ -0,0 +1,4 @@ +{ + "version": 6, + "include": ["cmake/CMakePresets.json"] +} diff --git a/build.py b/build.py.legacy similarity index 100% rename from build.py rename to build.py.legacy diff --git a/cmake/CMakePresets.json b/cmake/CMakePresets.json new file mode 100644 index 0000000000..4b0019ddda --- /dev/null +++ b/cmake/CMakePresets.json @@ -0,0 +1,88 @@ +{ + "version": 6, + "cmakeMinimumRequired": { "major": 3, "minor": 25, "patch": 0 }, + "vendor": { + "note": "AUTHORITATIVE PRESET FILE. When building from server/ all downstream repos (core, common, backend) are included via add_subdirectory and inherit these variables. Downstream repos have their own CMakePresets.json for standalone builds only." + }, + "configurePresets": [ + { + "name": "conan-base", + "hidden": true, + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { "CMAKE_EXPORT_COMPILE_COMMANDS": "ON" } + }, + { + "name": "release", + "inherits": "conan-base", + "displayName": "Release (GPU + HTTP + gRPC + Metrics)", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON" + } + }, + { + "name": "debug", + "inherits": "conan-base", + "displayName": "Debug (GPU + HTTP + gRPC + Metrics)", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Debug", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON" + } + }, + { + "name": "cpu-only", + "inherits": "conan-base", + "displayName": "Release CPU-only", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "OFF", + "TRITON_ENABLE_METRICS_GPU": "OFF", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON" + } + }, + { + "name": "all-features", + "inherits": "conan-base", + "displayName": "Release - all features (no cloud storage)", + "cacheVariables": { + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_TRACING": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_ENABLE_GCS": "OFF", + "TRITON_ENABLE_S3": "OFF", + "TRITON_ENABLE_AZURE_STORAGE": "OFF", + "TRITON_ENABLE_IDENTITY_BACKEND": "ON", + "TRITON_ENABLE_REPEAT_BACKEND": "ON", + "TRITON_ENABLE_SQUARE_BACKEND": "ON", + "TRITON_ENABLE_PYTHON_BACKEND": "OFF", + "TRITON_ENABLE_ONNXRUNTIME_BACKEND": "OFF", + "TRITON_ENABLE_PYTORCH_BACKEND": "OFF", + "TRITON_ENABLE_TENSORRT_BACKEND": "OFF", + "TRITON_ENABLE_VLLM_BACKEND": "OFF", + "TRITON_ENABLE_TENSORRTLLM_BACKEND": "OFF" + } + } + ], + "buildPresets": [ + { "name": "release", "configurePreset": "release" }, + { "name": "debug", "configurePreset": "debug" }, + { "name": "cpu-only", "configurePreset": "cpu-only" }, + { "name": "all-features", "configurePreset": "all-features" } + ] +} diff --git a/cmake/TritonCheckCxxAbi.cmake b/cmake/TritonCheckCxxAbi.cmake new file mode 100644 index 0000000000..17acb32e1c --- /dev/null +++ b/cmake/TritonCheckCxxAbi.cmake @@ -0,0 +1,110 @@ +# TritonCheckCxxAbi.cmake +# +# Detects the _GLIBCXX_USE_CXX11_ABI value that the current build will use and +# validates that the Conan-provided binary packages were compiled with the same +# ABI setting. A mismatch causes silent link failures or runtime crashes when +# mixing old ABI (=0) and new ABI (=1) translation units. +# +# Usage (call once, early in the root CMakeLists.txt after find_package calls): +# include(cmake/TritonCheckCxxAbi.cmake) +# triton_check_cxx_abi() +# +# The macro sets TRITON_GLIBCXX_USE_CXX11_ABI (cache variable) to 0 or 1. + +macro(triton_check_cxx_abi) + if(NOT CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + return() + endif() + + # --------------------------------------------------------------- + # 1. Probe the ABI the compiler will use for this build. + # --------------------------------------------------------------- + include(CheckCXXSourceCompiles) + set(_abi_probe_src " +#include +#if _GLIBCXX_USE_CXX11_ABI +int use_new_abi = 1; +#else +int use_new_abi = 0; +#endif +int main() { return use_new_abi; } +") + # Ask the compiler directly via try_compile output rather than + # running the binary, so cross-compilation works too. + file(WRITE "${CMAKE_BINARY_DIR}/_abi_probe.cpp" "${_abi_probe_src}") + try_compile( + _abi_compile_ok + "${CMAKE_BINARY_DIR}/_abi_probe_build" + SOURCES "${CMAKE_BINARY_DIR}/_abi_probe.cpp" + CXX_STANDARD 17 + OUTPUT_VARIABLE _abi_compile_output + ) + + if(NOT _abi_compile_ok) + message(WARNING "TritonCheckCxxAbi: could not compile ABI probe. Skipping ABI check.") + return() + endif() + + # Extract the _GLIBCXX_USE_CXX11_ABI value by compiling a file that + # emits a preprocessor-expanded value we can grep. + file(WRITE "${CMAKE_BINARY_DIR}/_abi_value.cpp" + "#include \nint abi_value = _GLIBCXX_USE_CXX11_ABI;\n") + execute_process( + COMMAND "${CMAKE_CXX_COMPILER}" -E -dM + "${CMAKE_BINARY_DIR}/_abi_value.cpp" + OUTPUT_VARIABLE _abi_macros + ERROR_QUIET + ) + + if(_abi_macros MATCHES "#define _GLIBCXX_USE_CXX11_ABI ([01])") + set(_detected_abi "${CMAKE_MATCH_1}") + else() + # Default: GCC >= 5 defaults to 1 (new ABI). + set(_detected_abi 1) + endif() + + set(TRITON_GLIBCXX_USE_CXX11_ABI "${_detected_abi}" + CACHE STRING "Detected _GLIBCXX_USE_CXX11_ABI value (0=old, 1=new)" FORCE) + message(STATUS "TritonCheckCxxAbi: _GLIBCXX_USE_CXX11_ABI = ${TRITON_GLIBCXX_USE_CXX11_ABI}") + + # --------------------------------------------------------------- + # 2. Validate Conan-provided binary packages that embed ABI info. + # --------------------------------------------------------------- + # Conan 2 records the compiler.libcxx setting in the package ID. + # If the Conan toolchain was generated we can cross-check via the + # conanbuildinfo vars; otherwise we skip silently. + if(DEFINED CONAN_COMPILER_LIBCXX) + if("${CONAN_COMPILER_LIBCXX}" STREQUAL "libstdc++" AND + "${TRITON_GLIBCXX_USE_CXX11_ABI}" STREQUAL "1") + message(FATAL_ERROR + "ABI MISMATCH: Build is using new C++11 ABI " + "(_GLIBCXX_USE_CXX11_ABI=1) but Conan packages were built " + "with libstdc++ (old ABI). Re-run 'conan install' with " + "compiler.libcxx=libstdc++11 in the host profile.") + endif() + if("${CONAN_COMPILER_LIBCXX}" STREQUAL "libstdc++11" AND + "${TRITON_GLIBCXX_USE_CXX11_ABI}" STREQUAL "0") + message(FATAL_ERROR + "ABI MISMATCH: Build is using old ABI " + "(_GLIBCXX_USE_CXX11_ABI=0) but Conan packages were built " + "with libstdc++11 (new ABI). Re-run 'conan install' with " + "compiler.libcxx=libstdc++ in the host profile, or remove " + "-D_GLIBCXX_USE_CXX11_ABI=0 from your compile flags.") + endif() + endif() + + # --------------------------------------------------------------- + # 3. Warn if a user has manually set a conflicting compile flag. + # --------------------------------------------------------------- + if(DEFINED CMAKE_CXX_FLAGS AND + CMAKE_CXX_FLAGS MATCHES "_GLIBCXX_USE_CXX11_ABI=([01])") + set(_flag_abi "${CMAKE_MATCH_1}") + if(NOT "${_flag_abi}" STREQUAL "${_detected_abi}") + message(WARNING + "TritonCheckCxxAbi: CMAKE_CXX_FLAGS sets " + "_GLIBCXX_USE_CXX11_ABI=${_flag_abi} but the compiler " + "default is ${_detected_abi}. This will likely cause " + "linker errors with Conan-provided binary packages.") + endif() + endif() +endmacro() diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json new file mode 100644 index 0000000000..c512080276 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json @@ -0,0 +1,64 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonClient", + "platform": "Ubuntu aarch64, CPU-only (Graviton, Neoverse, Apple Silicon dev)", + "notes": [ + "CPU-only client build for aarch64 — no CUDA required.", + "Targets AWS Graviton, Ampere Altra, and NVIDIA Grace (CPU die).", + "Run cmake from the client/ source directory." + ], + "conan_install": [ + "conan install client/", + " --profile:host=server/conan/profiles/linux-gcc13-release-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=False'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=client/build/triton-client-release-cpu-ubuntu-aarch64/conan", + "", + "cmake --preset triton-client-release-cpu-ubuntu-aarch64", + "cmake --build --preset triton-client-release-cpu-ubuntu-aarch64", + "cmake --build --preset triton-client-release-cpu-ubuntu-aarch64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-client-release-cpu-ubuntu-aarch64", + "displayName": "TritonClient — Release, CPU-only, Ubuntu, aarch64", + "description": "CPU-only client library build for Ubuntu Linux on aarch64.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "OFF", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PYTHON_HTTP": "ON", + "TRITON_ENABLE_PYTHON_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER": "ON", + "TRITON_ENABLE_EXAMPLES": "ON", + "TRITON_ENABLE_TESTS": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-client-release-cpu-ubuntu-aarch64", + "configurePreset": "triton-client-release-cpu-ubuntu-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-client-release-cpu-ubuntu-aarch64", + "configurePreset": "triton-client-release-cpu-ubuntu-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json new file mode 100644 index 0000000000..f32a410183 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json @@ -0,0 +1,64 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonClient", + "platform": "Ubuntu x86_64, CPU-only (no NVIDIA GPU required)", + "notes": [ + "CPU-only client build — no CUDA toolkit required.", + "CUDA shared memory operations are disabled.", + "Run cmake from the client/ source directory." + ], + "conan_install": [ + "conan install client/", + " --profile:host=server/conan/profiles/linux-gcc13-release", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=False'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=client/build/triton-client-release-cpu-ubuntu-x86_64/conan", + "", + "cmake --preset triton-client-release-cpu-ubuntu-x86_64", + "cmake --build --preset triton-client-release-cpu-ubuntu-x86_64", + "cmake --build --preset triton-client-release-cpu-ubuntu-x86_64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-client-release-cpu-ubuntu-x86_64", + "displayName": "TritonClient — Release, CPU-only, Ubuntu, x86_64", + "description": "CPU-only client library build for Ubuntu Linux on x86_64. No CUDA required.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "OFF", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PYTHON_HTTP": "ON", + "TRITON_ENABLE_PYTHON_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER": "ON", + "TRITON_ENABLE_EXAMPLES": "ON", + "TRITON_ENABLE_TESTS": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-client-release-cpu-ubuntu-x86_64", + "configurePreset": "triton-client-release-cpu-ubuntu-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-client-release-cpu-ubuntu-x86_64", + "configurePreset": "triton-client-release-cpu-ubuntu-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json new file mode 100644 index 0000000000..1e6546e50e --- /dev/null +++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json @@ -0,0 +1,65 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonClient", + "platform": "manylinux (RHEL/CentOS-compatible) aarch64 with NVIDIA GPU (CUDA)", + "notes": [ + "manylinux client build for aarch64 — broad Linux distribution compatibility.", + "Targets Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).", + "Run cmake from the client/ source directory." + ], + "conan_install": [ + "conan install client/", + " --profile:host=server/conan/profiles/linux-gcc13-release-manylinux-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=client/build/triton-client-release-cuda-manylinux-aarch64/conan", + "", + "cmake --preset triton-client-release-cuda-manylinux-aarch64", + "cmake --build --preset triton-client-release-cuda-manylinux-aarch64", + "cmake --build --preset triton-client-release-cuda-manylinux-aarch64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-client-release-cuda-manylinux-aarch64", + "displayName": "TritonClient — Release, CUDA, manylinux, aarch64", + "description": "GPU-enabled client library build for RHEL-compatible Linux on aarch64.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PYTHON_HTTP": "ON", + "TRITON_ENABLE_PYTHON_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER": "ON", + "TRITON_ENABLE_EXAMPLES": "OFF", + "TRITON_ENABLE_TESTS": "OFF", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-client-release-cuda-manylinux-aarch64", + "configurePreset": "triton-client-release-cuda-manylinux-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-client-release-cuda-manylinux-aarch64", + "configurePreset": "triton-client-release-cuda-manylinux-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json new file mode 100644 index 0000000000..d01c17a29f --- /dev/null +++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json @@ -0,0 +1,65 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonClient", + "platform": "manylinux (RHEL/CentOS-compatible) x86_64 with NVIDIA GPU (CUDA)", + "notes": [ + "manylinux client build — broad Linux distribution compatibility (GLIBC 2.17+).", + "Primary distribution target: the tritonclient Python wheels published to PyPI.", + "Run cmake from the client/ source directory." + ], + "conan_install": [ + "conan install client/", + " --profile:host=server/conan/profiles/linux-gcc13-release-manylinux", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=client/build/triton-client-release-cuda-manylinux-x86_64/conan", + "", + "cmake --preset triton-client-release-cuda-manylinux-x86_64", + "cmake --build --preset triton-client-release-cuda-manylinux-x86_64", + "cmake --build --preset triton-client-release-cuda-manylinux-x86_64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-client-release-cuda-manylinux-x86_64", + "displayName": "TritonClient — Release, CUDA, manylinux, x86_64", + "description": "GPU-enabled client library build for RHEL/CentOS-compatible Linux on x86_64.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PYTHON_HTTP": "ON", + "TRITON_ENABLE_PYTHON_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER": "ON", + "TRITON_ENABLE_EXAMPLES": "OFF", + "TRITON_ENABLE_TESTS": "OFF", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-client-release-cuda-manylinux-x86_64", + "configurePreset": "triton-client-release-cuda-manylinux-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-client-release-cuda-manylinux-x86_64", + "configurePreset": "triton-client-release-cuda-manylinux-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json new file mode 100644 index 0000000000..a80342c5ae --- /dev/null +++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json @@ -0,0 +1,64 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonClient", + "platform": "Ubuntu aarch64 with NVIDIA GPU (CUDA) — Jetson Orin, Grace Hopper", + "notes": [ + "GPU-enabled client build for aarch64.", + "Targets Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).", + "Run cmake from the client/ source directory." + ], + "conan_install": [ + "conan install client/", + " --profile:host=server/conan/profiles/linux-gcc13-release-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=client/build/triton-client-release-cuda-ubuntu-aarch64/conan", + "", + "cmake --preset triton-client-release-cuda-ubuntu-aarch64", + "cmake --build --preset triton-client-release-cuda-ubuntu-aarch64" + ] + } + }, + "configurePresets": [ + { + "name": "triton-client-release-cuda-ubuntu-aarch64", + "displayName": "TritonClient — Release, CUDA, Ubuntu, aarch64", + "description": "GPU-enabled client library build for Ubuntu Linux on aarch64 (Jetson Orin, Grace Hopper).", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PYTHON_HTTP": "ON", + "TRITON_ENABLE_PYTHON_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER": "ON", + "TRITON_ENABLE_EXAMPLES": "ON", + "TRITON_ENABLE_TESTS": "ON", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-client-release-cuda-ubuntu-aarch64", + "configurePreset": "triton-client-release-cuda-ubuntu-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-client-release-cuda-ubuntu-aarch64", + "configurePreset": "triton-client-release-cuda-ubuntu-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json new file mode 100644 index 0000000000..c27515e095 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json @@ -0,0 +1,65 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonClient", + "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA)", + "notes": [ + "GPU-enabled client build — CUDA shared memory support included.", + "Builds C++ and Python client libraries plus perf_analyzer.", + "Run cmake from the client/ source directory." + ], + "conan_install": [ + "conan install client/", + " --profile:host=server/conan/profiles/linux-gcc13-release", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=client/build/triton-client-release-cuda-ubuntu-x86_64/conan", + "", + "cmake --preset triton-client-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-client-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-client-release-cuda-ubuntu-x86_64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-client-release-cuda-ubuntu-x86_64", + "displayName": "TritonClient — Release, CUDA, Ubuntu, x86_64", + "description": "GPU-enabled client library build for Ubuntu Linux on x86_64.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PYTHON_HTTP": "ON", + "TRITON_ENABLE_PYTHON_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER": "ON", + "TRITON_ENABLE_EXAMPLES": "ON", + "TRITON_ENABLE_TESTS": "ON", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-client-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-client-release-cuda-ubuntu-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-client-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-client-release-cuda-ubuntu-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json new file mode 100644 index 0000000000..06e0cc1072 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json @@ -0,0 +1,66 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonInferenceServer", + "platform": "Ubuntu aarch64, CPU-only (Graviton, Neoverse, Apple Silicon dev)", + "notes": [ + "CPU-only server build for aarch64 — no CUDA required.", + "Targets AWS Graviton, Ampere Altra, NVIDIA Grace (CPU die), and similar ARM64 platforms.", + "SageMaker endpoint enabled: AWS Graviton instances are a common deployment target." + ], + "conan_install": [ + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=False'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-inference-server-release-cpu-ubuntu-aarch64/conan", + "", + "cmake --preset triton-inference-server-release-cpu-ubuntu-aarch64", + "cmake --build --preset triton-inference-server-release-cpu-ubuntu-aarch64", + "cmake --build --preset triton-inference-server-release-cpu-ubuntu-aarch64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-inference-server-release-cpu-ubuntu-aarch64", + "displayName": "TritonInferenceServer — Release, CPU-only, Ubuntu, aarch64", + "description": "CPU-only release build for Ubuntu Linux on aarch64 (Graviton, Neoverse, Grace CPU).", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "OFF", + "TRITON_ENABLE_METRICS_GPU": "OFF", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_ENABLE_SAGEMAKER": "ON", + "TRITON_ENABLE_VERTEX_AI": "OFF" + } + } + ], + "buildPresets": [ + { + "name": "triton-inference-server-release-cpu-ubuntu-aarch64", + "configurePreset": "triton-inference-server-release-cpu-ubuntu-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-inference-server-release-cpu-ubuntu-aarch64", + "configurePreset": "triton-inference-server-release-cpu-ubuntu-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json new file mode 100644 index 0000000000..e0e11cabed --- /dev/null +++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json @@ -0,0 +1,66 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonInferenceServer", + "platform": "Ubuntu x86_64, CPU-only (no NVIDIA GPU required)", + "notes": [ + "CPU-only server build — no CUDA toolkit required.", + "Suitable for backends: identity, repeat, ensemble, square, Python, ONNX Runtime (CPU), OpenVINO.", + "SageMaker and Vertex AI endpoints enabled." + ], + "conan_install": [ + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=False'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-inference-server-release-cpu-ubuntu-x86_64/conan", + "", + "cmake --preset triton-inference-server-release-cpu-ubuntu-x86_64", + "cmake --build --preset triton-inference-server-release-cpu-ubuntu-x86_64", + "cmake --build --preset triton-inference-server-release-cpu-ubuntu-x86_64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-inference-server-release-cpu-ubuntu-x86_64", + "displayName": "TritonInferenceServer — Release, CPU-only, Ubuntu, x86_64", + "description": "CPU-only release build for Ubuntu Linux on x86_64. No CUDA required.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "OFF", + "TRITON_ENABLE_METRICS_GPU": "OFF", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_ENABLE_SAGEMAKER": "ON", + "TRITON_ENABLE_VERTEX_AI": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-inference-server-release-cpu-ubuntu-x86_64", + "configurePreset": "triton-inference-server-release-cpu-ubuntu-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-inference-server-release-cpu-ubuntu-x86_64", + "configurePreset": "triton-inference-server-release-cpu-ubuntu-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json new file mode 100644 index 0000000000..fd72da0ce2 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json @@ -0,0 +1,68 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonInferenceServer", + "platform": "manylinux (RHEL/CentOS-compatible) aarch64 with NVIDIA GPU (CUDA)", + "notes": [ + "manylinux builds target GLIBC 2.17+ compatibility.", + "aarch64 CUDA targets: Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).", + "SageMaker and Vertex AI endpoints are disabled." + ], + "conan_install": [ + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release-manylinux-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-inference-server-release-cuda-manylinux-aarch64/conan", + "", + "cmake --preset triton-inference-server-release-cuda-manylinux-aarch64", + "cmake --build --preset triton-inference-server-release-cuda-manylinux-aarch64" + ] + } + }, + "configurePresets": [ + { + "name": "triton-inference-server-release-cuda-manylinux-aarch64", + "displayName": "TritonInferenceServer — Release, CUDA, manylinux, aarch64", + "description": "GPU-enabled release build for RHEL-compatible Linux on aarch64 (Orin, Grace Hopper).", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_ENABLE_SAGEMAKER": "OFF", + "TRITON_ENABLE_VERTEX_AI": "OFF", + "TRITON_MIN_COMPUTE_CAPABILITY": "8.7", + "CMAKE_CUDA_ARCHITECTURES": "87;90", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-inference-server-release-cuda-manylinux-aarch64", + "configurePreset": "triton-inference-server-release-cuda-manylinux-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-inference-server-release-cuda-manylinux-aarch64", + "configurePreset": "triton-inference-server-release-cuda-manylinux-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json new file mode 100644 index 0000000000..b2aa9690e8 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json @@ -0,0 +1,68 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonInferenceServer", + "platform": "manylinux (RHEL/CentOS-compatible) x86_64 with NVIDIA GPU (CUDA)", + "notes": [ + "manylinux builds target GLIBC 2.17+ compatibility for broad Linux distribution support.", + "SageMaker and Vertex AI endpoints are disabled (not used in RHEL-based container deployments).", + "Use the linux-gcc13-release-manylinux Conan profile which links against the manylinux sysroot." + ], + "conan_install": [ + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release-manylinux", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-inference-server-release-cuda-manylinux-x86_64/conan", + "", + "cmake --preset triton-inference-server-release-cuda-manylinux-x86_64", + "cmake --build --preset triton-inference-server-release-cuda-manylinux-x86_64" + ] + } + }, + "configurePresets": [ + { + "name": "triton-inference-server-release-cuda-manylinux-x86_64", + "displayName": "TritonInferenceServer — Release, CUDA, manylinux, x86_64", + "description": "GPU-enabled release build for RHEL/CentOS-compatible (manylinux2014) Linux on x86_64.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_ENABLE_SAGEMAKER": "OFF", + "TRITON_ENABLE_VERTEX_AI": "OFF", + "TRITON_MIN_COMPUTE_CAPABILITY": "6.0", + "CMAKE_CUDA_ARCHITECTURES": "60;70;75;80;86;89;90", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-inference-server-release-cuda-manylinux-x86_64", + "configurePreset": "triton-inference-server-release-cuda-manylinux-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-inference-server-release-cuda-manylinux-x86_64", + "configurePreset": "triton-inference-server-release-cuda-manylinux-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json new file mode 100644 index 0000000000..19305f5cf9 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json @@ -0,0 +1,68 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonInferenceServer", + "platform": "Ubuntu aarch64 with NVIDIA GPU (CUDA) — Jetson Orin, Grace Hopper", + "notes": [ + "GPU-enabled server build for Ubuntu on aarch64.", + "Targets Jetson Orin (SM 8.7) and Grace Hopper (SM 9.0).", + "SageMaker and Vertex AI endpoints enabled." + ], + "conan_install": [ + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-inference-server-release-cuda-ubuntu-aarch64/conan", + "", + "cmake --preset triton-inference-server-release-cuda-ubuntu-aarch64", + "cmake --build --preset triton-inference-server-release-cuda-ubuntu-aarch64" + ] + } + }, + "configurePresets": [ + { + "name": "triton-inference-server-release-cuda-ubuntu-aarch64", + "displayName": "TritonInferenceServer — Release, CUDA, Ubuntu, aarch64", + "description": "GPU-enabled release build for Ubuntu Linux on aarch64 (Jetson Orin = SM 8.7, Grace Hopper = SM 9.0).", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_ENABLE_SAGEMAKER": "ON", + "TRITON_ENABLE_VERTEX_AI": "ON", + "TRITON_MIN_COMPUTE_CAPABILITY": "8.7", + "CMAKE_CUDA_ARCHITECTURES": "87;90", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-inference-server-release-cuda-ubuntu-aarch64", + "configurePreset": "triton-inference-server-release-cuda-ubuntu-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-inference-server-release-cuda-ubuntu-aarch64", + "configurePreset": "triton-inference-server-release-cuda-ubuntu-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json new file mode 100644 index 0000000000..a58995bc82 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json @@ -0,0 +1,69 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonInferenceServer", + "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA)", + "notes": [ + "GPU-enabled server build for Ubuntu on x86_64.", + "Requires CUDA toolkit, cuDNN, and an NVIDIA driver.", + "SageMaker and Vertex AI endpoints enabled for cloud deployment." + ], + "conan_install": [ + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-inference-server-release-cuda-ubuntu-x86_64/conan", + "", + "cmake --preset triton-inference-server-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-inference-server-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-inference-server-release-cuda-ubuntu-x86_64 --target wheels" + ] + } + }, + "configurePresets": [ + { + "name": "triton-inference-server-release-cuda-ubuntu-x86_64", + "displayName": "TritonInferenceServer — Release, CUDA, Ubuntu, x86_64", + "description": "GPU-enabled release build for Ubuntu Linux on x86_64. Requires CUDA toolkit and NVIDIA driver.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_ENABLE_SAGEMAKER": "ON", + "TRITON_ENABLE_VERTEX_AI": "ON", + "TRITON_MIN_COMPUTE_CAPABILITY": "7.5", + "CMAKE_CUDA_ARCHITECTURES": "75;80;86;89;90", + "CMAKE_CUDA_RUNTIME_LIBRARY": "Shared" + } + } + ], + "buildPresets": [ + { + "name": "triton-inference-server-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-inference-server-release-cuda-ubuntu-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-inference-server-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-inference-server-release-cuda-ubuntu-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json new file mode 100644 index 0000000000..2f20f9caa4 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json @@ -0,0 +1,62 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonPerfAnalyzer", + "platform": "manylinux (RHEL/CentOS-compatible) aarch64", + "notes": [ + "PerfAnalyzer manylinux build for aarch64 — broad compatibility (GLIBC 2.17+).", + "Targets Jetson Orin, Grace Hopper, and AWS Graviton platforms.", + "GPU mode enabled for benchmarking GPU models on aarch64.", + "Run cmake from the perf_analyzer/ source directory." + ], + "conan_install": [ + "conan install perf_analyzer/", + " --profile:host=server/conan/profiles/linux-gcc13-release-manylinux-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=perf_analyzer/build/triton-perf-analyzer-release-manylinux-aarch64/conan", + "", + "cmake --preset triton-perf-analyzer-release-manylinux-aarch64", + "cmake --build --preset triton-perf-analyzer-release-manylinux-aarch64" + ] + } + }, + "configurePresets": [ + { + "name": "triton-perf-analyzer-release-manylinux-aarch64", + "displayName": "TritonPerfAnalyzer — Release, manylinux, aarch64", + "description": "PerfAnalyzer release build for manylinux-compatible Linux on aarch64.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER_OPENAI": "ON", + "TRITON_ENABLE_PERF_ANALYZER_TFS": "ON", + "TRITON_ENABLE_PERF_ANALYZER_TS": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-perf-analyzer-release-manylinux-aarch64", + "configurePreset": "triton-perf-analyzer-release-manylinux-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-perf-analyzer-release-manylinux-aarch64", + "configurePreset": "triton-perf-analyzer-release-manylinux-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json new file mode 100644 index 0000000000..bde85fdca5 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json @@ -0,0 +1,62 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonPerfAnalyzer", + "platform": "manylinux (RHEL/CentOS-compatible) x86_64", + "notes": [ + "PerfAnalyzer is distributed as a standalone binary — manylinux ensures maximum", + "compatibility across Linux distributions (GLIBC 2.17+).", + "GPU mode (CUDA shared memory, GPU metrics) is enabled for benchmarking GPU models.", + "Run cmake from the perf_analyzer/ source directory." + ], + "conan_install": [ + "conan install perf_analyzer/", + " --profile:host=server/conan/profiles/linux-gcc13-release-manylinux", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " --build=missing", + " --output-folder=perf_analyzer/build/triton-perf-analyzer-release-manylinux-x86_64/conan", + "", + "cmake --preset triton-perf-analyzer-release-manylinux-x86_64", + "cmake --build --preset triton-perf-analyzer-release-manylinux-x86_64" + ] + } + }, + "configurePresets": [ + { + "name": "triton-perf-analyzer-release-manylinux-x86_64", + "displayName": "TritonPerfAnalyzer — Release, manylinux, x86_64", + "description": "PerfAnalyzer release build for manylinux-compatible Linux on x86_64.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_CC_HTTP": "ON", + "TRITON_ENABLE_CC_GRPC": "ON", + "TRITON_ENABLE_PERF_ANALYZER_OPENAI": "ON", + "TRITON_ENABLE_PERF_ANALYZER_TFS": "ON", + "TRITON_ENABLE_PERF_ANALYZER_TS": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-perf-analyzer-release-manylinux-x86_64", + "configurePreset": "triton-perf-analyzer-release-manylinux-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-perf-analyzer-release-manylinux-x86_64", + "configurePreset": "triton-perf-analyzer-release-manylinux-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json new file mode 100644 index 0000000000..d9672ed852 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json @@ -0,0 +1,73 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonTRTLLMBackend", + "platform": "Ubuntu aarch64 with NVIDIA GPU (CUDA) — Grace Hopper (GH200)", + "notes": [ + "TensorRT-LLM backend build for Ubuntu on aarch64.", + "Primary target: NVIDIA Grace Hopper Superchip (GH200, SM 9.0).", + "The TensorRT-LLM library must be installed as a Python wheel before the build:", + " pip install tensorrt-llm --extra-index-url https://pypi.nvidia.com", + "Requires CUDA 12.x and TensorRT 10.x for aarch64.", + "Run cmake from the server/ source directory." + ], + "conan_install": [ + "pip install tensorrt-llm --extra-index-url https://pypi.nvidia.com", + "", + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release-aarch64", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-trtllm-backend-release-cuda-ubuntu-aarch64/conan", + "", + "cmake --preset triton-trtllm-backend-release-cuda-ubuntu-aarch64", + "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-aarch64", + "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-aarch64 --target tensorrtllm-backend" + ] + } + }, + "configurePresets": [ + { + "name": "triton-trtllm-backend-release-cuda-ubuntu-aarch64", + "displayName": "TritonTRTLLMBackend — Release, CUDA, Ubuntu, aarch64", + "description": "Triton server + TensorRT-LLM backend for Ubuntu Linux on aarch64 (Grace Hopper GH200 = SM 9.0). Requires tensorrt-llm wheel installed before cmake configure.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_MIN_COMPUTE_CAPABILITY": "9.0", + "CMAKE_CUDA_ARCHITECTURES": "90", + "TRITON_ENABLE_TENSORRTLLM_BACKEND": "ON", + "TRITON_FETCH_MISSING_BACKENDS": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-trtllm-backend-release-cuda-ubuntu-aarch64", + "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-aarch64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-trtllm-backend-release-cuda-ubuntu-aarch64", + "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-aarch64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json new file mode 100644 index 0000000000..f63fd405e6 --- /dev/null +++ b/cmake/presets/CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json @@ -0,0 +1,73 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonTRTLLMBackend", + "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA) — Ampere and newer", + "notes": [ + "TensorRT-LLM backend build for Ubuntu on x86_64.", + "The TensorRT-LLM library (tensorrt_llm) must be installed as a Python wheel", + "before the Triton server build (it provides C++ headers and shared libs):", + " pip install tensorrt-llm --extra-index-url https://pypi.nvidia.com", + "Requires CUDA 12.x, TensorRT 10.x, and an Ampere or newer GPU (SM 8.0+).", + "Run cmake from the server/ source directory." + ], + "conan_install": [ + "pip install tensorrt-llm --extra-index-url https://pypi.nvidia.com", + "", + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-trtllm-backend-release-cuda-ubuntu-x86_64/conan", + "", + "cmake --preset triton-trtllm-backend-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-trtllm-backend-release-cuda-ubuntu-x86_64 --target tensorrtllm-backend" + ] + } + }, + "configurePresets": [ + { + "name": "triton-trtllm-backend-release-cuda-ubuntu-x86_64", + "displayName": "TritonTRTLLMBackend — Release, CUDA, Ubuntu, x86_64", + "description": "Triton server + TensorRT-LLM backend for Ubuntu Linux on x86_64. Requires tensorrt-llm wheel installed before cmake configure.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_MIN_COMPUTE_CAPABILITY": "8.0", + "CMAKE_CUDA_ARCHITECTURES": "80;86;89;90", + "TRITON_ENABLE_TENSORRTLLM_BACKEND": "ON", + "TRITON_FETCH_MISSING_BACKENDS": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-trtllm-backend-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-trtllm-backend-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-trtllm-backend-release-cuda-ubuntu-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json b/cmake/presets/CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json new file mode 100644 index 0000000000..c5e90ece7a --- /dev/null +++ b/cmake/presets/CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json @@ -0,0 +1,72 @@ +{ + "version": 6, + "vendor": { + "triton": { + "component": "TritonVLLMBackend", + "platform": "Ubuntu x86_64 with NVIDIA GPU (CUDA)", + "notes": [ + "vLLM backend build for Ubuntu on x86_64.", + "The vLLM backend is Python-only — CMake configures the server and enables", + "the backend stub so the vLLM Python package is discovered at runtime.", + "Install vLLM and its Triton shim before deployment:", + " pip install vllm", + " pip install tritonclient[all]", + "Requires CUDA toolkit and an NVIDIA driver (Ampere or newer recommended)." + ], + "conan_install": [ + "conan install server/", + " --profile:host=server/conan/profiles/linux-gcc13-release", + " --profile:build=server/conan/profiles/linux-gcc13-release", + " -o '&:enable_gpu=True'", + " -o '&:enable_grpc=True'", + " -o '&:enable_http=True'", + " -o '&:enable_metrics=True'", + " --build=missing", + " --output-folder=server/build/triton-vllm-backend-release-cuda-ubuntu-x86_64/conan", + "", + "cmake --preset triton-vllm-backend-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-vllm-backend-release-cuda-ubuntu-x86_64", + "cmake --build --preset triton-vllm-backend-release-cuda-ubuntu-x86_64 --target vllm-backend" + ] + } + }, + "configurePresets": [ + { + "name": "triton-vllm-backend-release-cuda-ubuntu-x86_64", + "displayName": "TritonVLLMBackend — Release, CUDA, Ubuntu, x86_64", + "description": "Triton server + vLLM backend for Ubuntu Linux on x86_64. vLLM backend is Python-only; this preset wires the backend into the server build.", + "generator": "Ninja", + "binaryDir": "${sourceDir}/build/${presetName}", + "toolchainFile": "${sourceDir}/build/${presetName}/conan/generators/conan_toolchain.cmake", + "cacheVariables": { + "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", + "CMAKE_BUILD_TYPE": "Release", + "TRITON_ENABLE_GPU": "ON", + "TRITON_ENABLE_HTTP": "ON", + "TRITON_ENABLE_GRPC": "ON", + "TRITON_ENABLE_METRICS": "ON", + "TRITON_ENABLE_METRICS_GPU": "ON", + "TRITON_ENABLE_METRICS_CPU": "ON", + "TRITON_ENABLE_ENSEMBLE": "ON", + "TRITON_MIN_COMPUTE_CAPABILITY": "8.0", + "CMAKE_CUDA_ARCHITECTURES": "80;86;89;90", + "TRITON_ENABLE_VLLM_BACKEND": "ON", + "TRITON_FETCH_MISSING_BACKENDS": "ON" + } + } + ], + "buildPresets": [ + { + "name": "triton-vllm-backend-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-vllm-backend-release-cuda-ubuntu-x86_64", + "jobs": 8 + } + ], + "testPresets": [ + { + "name": "triton-vllm-backend-release-cuda-ubuntu-x86_64", + "configurePreset": "triton-vllm-backend-release-cuda-ubuntu-x86_64", + "filter": { "exclude": { "label": "requires-gpu" } } + } + ] +} diff --git a/cmake/presets/CMakePresets.json b/cmake/presets/CMakePresets.json new file mode 100644 index 0000000000..bc25f0c15f --- /dev/null +++ b/cmake/presets/CMakePresets.json @@ -0,0 +1,46 @@ +{ + "version": 6, + "vendor": { + "triton": { + "description": "Platform-specific preset catalog for all Triton components. Include this file from CMakeUserPresets.json to activate all platform presets.", + "components": [ + "TritonInferenceServer — server binary + backends (run cmake from server/)", + "TritonClient — C++/Python client libraries (run cmake from client/)", + "TritonPerfAnalyzer — standalone benchmark tool, manylinux only (run cmake from perf_analyzer/)", + "TritonVLLMBackend — server + vLLM Python backend (run cmake from server/)", + "TritonTRTLLMBackend — server + TensorRT-LLM backend (run cmake from server/; install tensorrt-llm wheel first)" + ], + "usage": [ + "After running 'conan install', add to CMakeUserPresets.json:", + " {", + " \"version\": 6,", + " \"include\": [", + " \"build//conan/generators/CMakePresets.json\",", + " \"/cmake/presets/CMakePresets.json\"", + " ]", + " }", + "", + "Then: cmake --preset triton-inference-server-release-cuda-ubuntu-x86_64" + ] + } + }, + "include": [ + "CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.x86_64.json", + "CMakePresets.TritonInferenceServer.Release.CUDA.ubuntu.aarch64.json", + "CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.x86_64.json", + "CMakePresets.TritonInferenceServer.Release.CUDA.manylinux.aarch64.json", + "CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.x86_64.json", + "CMakePresets.TritonInferenceServer.Release.CPU.ubuntu.aarch64.json", + "CMakePresets.TritonClient.Release.CUDA.ubuntu.x86_64.json", + "CMakePresets.TritonClient.Release.CUDA.ubuntu.aarch64.json", + "CMakePresets.TritonClient.Release.CUDA.manylinux.x86_64.json", + "CMakePresets.TritonClient.Release.CUDA.manylinux.aarch64.json", + "CMakePresets.TritonClient.Release.CPU.ubuntu.x86_64.json", + "CMakePresets.TritonClient.Release.CPU.ubuntu.aarch64.json", + "CMakePresets.TritonPerfAnalyzer.Release.manylinux.x86_64.json", + "CMakePresets.TritonPerfAnalyzer.Release.manylinux.aarch64.json", + "CMakePresets.TritonVLLMBackend.Release.CUDA.ubuntu.x86_64.json", + "CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.x86_64.json", + "CMakePresets.TritonTRTLLMBackend.Release.CUDA.ubuntu.aarch64.json" + ] +} diff --git a/conan/profiles/linux-gcc13-debug b/conan/profiles/linux-gcc13-debug new file mode 100644 index 0000000000..51e994b678 --- /dev/null +++ b/conan/profiles/linux-gcc13-debug @@ -0,0 +1,13 @@ +[settings] +os=Linux +arch=x86_64 +compiler=gcc +compiler.version=13 +compiler.libcxx=libstdc++11 +compiler.cppstd=17 +build_type=Debug + +[buildenv] +CC=/usr/bin/gcc-13 +CXX=/usr/bin/g++-13 +PATH=+/home/user/.venv/bin diff --git a/conan/profiles/linux-gcc13-release b/conan/profiles/linux-gcc13-release new file mode 100644 index 0000000000..4ad3b65fc8 --- /dev/null +++ b/conan/profiles/linux-gcc13-release @@ -0,0 +1,15 @@ +[settings] +os=Linux +arch=x86_64 +compiler=gcc +compiler.version=13 +compiler.libcxx=libstdc++11 +compiler.cppstd=17 +build_type=Release + +[conf] +tools.cmake:cmake_program=/home/user/.venv/bin/cmake + +[buildenv] +CC=/usr/bin/gcc-13 +CXX=/usr/bin/g++-13 diff --git a/conan/profiles/linux-gcc13-release-aarch64 b/conan/profiles/linux-gcc13-release-aarch64 new file mode 100644 index 0000000000..235f252017 --- /dev/null +++ b/conan/profiles/linux-gcc13-release-aarch64 @@ -0,0 +1,12 @@ +[settings] +os=Linux +arch=armv8 +compiler=gcc +compiler.version=13 +compiler.libcxx=libstdc++11 +compiler.cppstd=17 +build_type=Release + +[buildenv] +CC=/usr/bin/aarch64-linux-gnu-gcc-13 +CXX=/usr/bin/aarch64-linux-gnu-g++-13 diff --git a/conan/profiles/linux-gcc13-release-manylinux b/conan/profiles/linux-gcc13-release-manylinux new file mode 100644 index 0000000000..9dae4a6296 --- /dev/null +++ b/conan/profiles/linux-gcc13-release-manylinux @@ -0,0 +1,19 @@ +[settings] +os=Linux +arch=x86_64 +compiler=gcc +compiler.version=13 +compiler.libcxx=libstdc++11 +compiler.cppstd=17 +build_type=Release + +[buildenv] +CC=/usr/bin/gcc-13 +CXX=/usr/bin/g++-13 +PATH=+/home/user/.venv/bin + +[conf] +# Target manylinux2014 (glibc 2.17+) — use the manylinux sysroot when available. +# Set CONAN_SYSROOT in the environment to point to the manylinux sysroot if +# cross-compiling; for native builds this profile is equivalent to linux-gcc13-release. +tools.build:sysroot= diff --git a/conan/profiles/linux-gcc13-release-manylinux-aarch64 b/conan/profiles/linux-gcc13-release-manylinux-aarch64 new file mode 100644 index 0000000000..235f252017 --- /dev/null +++ b/conan/profiles/linux-gcc13-release-manylinux-aarch64 @@ -0,0 +1,12 @@ +[settings] +os=Linux +arch=armv8 +compiler=gcc +compiler.version=13 +compiler.libcxx=libstdc++11 +compiler.cppstd=17 +build_type=Release + +[buildenv] +CC=/usr/bin/aarch64-linux-gnu-gcc-13 +CXX=/usr/bin/aarch64-linux-gnu-g++-13 diff --git a/conan/recipes/cnmem/conanfile.py b/conan/recipes/cnmem/conanfile.py new file mode 100644 index 0000000000..7779a78210 --- /dev/null +++ b/conan/recipes/cnmem/conanfile.py @@ -0,0 +1,49 @@ +import os +from conan import ConanFile +from conan.tools.cmake import CMake, CMakeToolchain, cmake_layout +from conan.tools.files import copy, get + + +class CnmemConan(ConanFile): + name = "cnmem" + version = "1.0.0" + description = "NVIDIA cnmem CUDA memory manager (Triton-patched, static library)" + license = "BSD-3-Clause" + url = "https://github.com/mc-nv/cnmem" + topics = ("cuda", "memory", "nvidia", "triton") + settings = "os", "compiler", "build_type", "arch" + options = {"shared": [True, False]} + default_options = {"shared": False} + + # Pinned to HEAD of mc-nv/cnmem (includes all Triton patches + Conan recipe). + # Update this SHA when a new release is cut on mc-nv/cnmem. + _commit = "81d127414eb67f2ba3ecf82ad074d667e5eed558" + + def source(self): + get(self, + f"https://github.com/mc-nv/cnmem/archive/{self._commit}.tar.gz", + strip_root=True) + + def layout(self): + cmake_layout(self, src_folder=".") + + def generate(self): + tc = CMakeToolchain(self) + tc.variables["BUILD_SHARED_LIBS"] = self.options.shared + tc.generate() + + def build(self): + cmake = CMake(self) + cmake.configure() + cmake.build() + + def package(self): + CMake(self).install() + copy(self, "*.h", + os.path.join(self.source_folder, "include"), + os.path.join(self.package_folder, "include")) + + def package_info(self): + self.cpp_info.set_property("cmake_file_name", "cnmem") + self.cpp_info.set_property("cmake_target_name", "cnmem::cnmem") + self.cpp_info.libs = ["cnmem"] diff --git a/conan/recipes/dcgm/conanfile.py b/conan/recipes/dcgm/conanfile.py new file mode 100644 index 0000000000..782965b730 --- /dev/null +++ b/conan/recipes/dcgm/conanfile.py @@ -0,0 +1,36 @@ +import os +from conan import ConanFile + + +class DcgmConan(ConanFile): + name = "dcgm" + version = "4.5.3" + description = "NVIDIA DataCenter GPU Manager (system package wrapper)" + settings = "os", "arch" + + def package_info(self): + self.cpp_info.set_property("cmake_file_name", "DCGM") + self.cpp_info.set_property("cmake_target_name", "DCGM::dcgm") + # Search candidate install locations in priority order: + # 1. /usr/local/dcgm — NVIDIA NGC container image layout + # 2. /usr — apt package (datacenter-gpu-manager) layout + _include_candidates = [ + "/usr/local/dcgm/include", + "/usr/include", + ] + _lib_candidates = [ + "/usr/local/dcgm/lib", + "/usr/local/dcgm/lib64", + "/usr/lib/x86_64-linux-gnu", + "/usr/lib/aarch64-linux-gnu", + ] + include_dirs = [d for d in _include_candidates + if os.path.isfile(os.path.join(d, "dcgm_agent.h"))] + lib_dirs = [d for d in _lib_candidates + if any(f.startswith("libdcgm") for f in (os.listdir(d) if os.path.isdir(d) else []))] + self.cpp_info.includedirs = include_dirs + self.cpp_info.libdirs = lib_dirs + # Use system_libs so CMakeDeps does not try to locate the library + # inside the (empty) Conan package folder; the linker will search + # the libdirs paths set via INTERFACE_LINK_DIRECTORIES at build time. + self.cpp_info.system_libs = ["dcgm"] diff --git a/conan/recipes/libevhtp/conanfile.py b/conan/recipes/libevhtp/conanfile.py new file mode 100644 index 0000000000..d09edacb11 --- /dev/null +++ b/conan/recipes/libevhtp/conanfile.py @@ -0,0 +1,76 @@ +from conan import ConanFile +from conan.tools.cmake import CMake, CMakeToolchain, CMakeDeps, cmake_layout +import os +from conan.tools.files import get, replace_in_file + + +class LibevhtpConan(ConanFile): + name = "libevhtp" + version = "1.2.18" + description = "NVIDIA Triton-patched libevhtp flexible HTTP server library" + license = "BSD-3-Clause" + url = "https://github.com/mc-nv/libevhtp" + topics = ("http", "libevent", "triton") + settings = "os", "compiler", "build_type", "arch" + options = { + "shared": [True, False], + "enable_tracing": [True, False], + } + default_options = { + "shared": False, + "enable_tracing": False, + } + + # Pinned to HEAD of mc-nv/libevhtp (includes all Triton patches + Conan recipe). + # Update this SHA when a new release is cut on mc-nv/libevhtp. + _commit = "59956d391d0f77cf144b2af5ec888de760088e22" + + def requirements(self): + self.requires("libevent/2.1.12") + + def source(self): + get(self, + f"https://github.com/mc-nv/libevhtp/archive/{self._commit}.tar.gz", + strip_root=True) + + def layout(self): + cmake_layout(self, src_folder=".") + + def generate(self): + tc = CMakeToolchain(self) + tc.variables["EVHTP_DISABLE_REGEX"] = True + tc.variables["EVHTP_DISABLE_SSL"] = True + tc.variables["EVHTP_TRITON_ENABLE_TRACING"] = self.options.enable_tracing + tc.variables["BUILD_SHARED_LIBS"] = self.options.shared + # Triton's tritonfrontend Python module links libevhtp into a .so; + # ensure all libevhtp object files are compiled with -fPIC. + tc.variables["CMAKE_POSITION_INDEPENDENT_CODE"] = True + tc.generate() + CMakeDeps(self).generate() + + def build(self): + # Patch libevhtp CMakeLists.txt to use Conan CMakeDeps imported targets + # instead of old-style Find module variables (LIBEVENT_LIBRARIES, etc.). + # CMakeDeps creates Libevent::core / Libevent::extra targets but does NOT + # set the LIBEVENT_LIBRARIES / LIBEVENT_INCLUDE_DIRS variables that the + # upstream CMakeLists.txt expects. + replace_in_file( + self, + os.path.join(self.source_folder, "CMakeLists.txt"), + "find_package(Libevent REQUIRED)\n" + "list(APPEND LIBEVHTP_EXTERNAL_LIBS ${LIBEVENT_LIBRARIES})\n" + "list(APPEND LIBEVHTP_EXTERNAL_INCLUDES ${LIBEVENT_INCLUDE_DIRS})", + "find_package(Libevent CONFIG REQUIRED)\n" + "list(APPEND LIBEVHTP_EXTERNAL_LIBS libevent::core libevent::extra libevent::pthreads)", + ) + cmake = CMake(self) + cmake.configure() + cmake.build() + + def package(self): + CMake(self).install() + + def package_info(self): + self.cpp_info.set_property("cmake_file_name", "libevhtp") + self.cpp_info.set_property("cmake_target_name", "libevhtp::evhtp") + self.cpp_info.libs = ["evhtp"] diff --git a/conanfile.py b/conanfile.py new file mode 100644 index 0000000000..3a06fdaa26 --- /dev/null +++ b/conanfile.py @@ -0,0 +1,101 @@ +from conan import ConanFile +from conan.tools.cmake import CMakeToolchain, CMakeDeps +from conan.errors import ConanInvalidConfiguration + + +class TritonServerConan(ConanFile): + name = "tritonserver" + version = "2.68.0" + settings = "os", "compiler", "build_type", "arch" + options = { + "enable_grpc": [True, False], + "enable_http": [True, False], + "enable_metrics": [True, False], + "enable_tracing": [True, False], + "enable_gcs": [True, False], + "enable_s3": [True, False], + "enable_azure_storage": [True, False], + "enable_gpu": [True, False], + } + default_options = { + "enable_grpc": True, + "enable_http": True, + "enable_metrics": True, + "enable_tracing": False, + "enable_gcs": False, + "enable_s3": False, + "enable_azure_storage": False, + "enable_gpu": True, + } + + def validate(self): + if self.settings.os != "Linux": + raise ConanInvalidConfiguration("tritonserver only supports Linux") + + def requirements(self): + self.requires("protobuf/3.21.12") + self.requires("re2/20230301") + self.requires("rapidjson/cci.20230929") + self.requires("gtest/1.14.0") + self.requires("libcurl/8.18.0") + self.requires("nlohmann_json/3.11.3") + if self.options.enable_grpc: + self.requires("grpc/1.54.3") + if self.options.enable_http: + self.requires("libevent/2.1.12") + self.requires("libevhtp/1.2.18") + if self.options.enable_metrics: + self.requires("prometheus-cpp/1.2.4") + if self.options.enable_tracing: + self.requires("opentelemetry-cpp/1.9.1") + if self.options.enable_gcs: + self.requires("google-cloud-cpp/2.28.0") + self.requires("crc32c/1.1.2") + if self.options.enable_s3: + self.requires("aws-sdk-cpp/1.11.60") + if self.options.enable_azure_storage: + self.requires("azure-sdk-for-cpp/1.12.0") + if self.options.enable_gpu: + self.requires("cnmem/1.0.0") + self.requires("dcgm/4.5.3") + + def configure(self): + self.options["libcurl"].shared = False + self.options["libcurl"].with_ssl = "openssl" + if self.options.enable_grpc: + self.options["grpc"].shared = False + self.options["grpc"].cpp_plugin = True + if self.options.enable_http: + self.options["libevent"].shared = False + self.options["libevent"].with_openssl = False + if self.options.enable_metrics: + self.options["prometheus-cpp"].shared = False + self.options["prometheus-cpp"].with_pull = False + self.options["prometheus-cpp"].with_push = False + if self.options.enable_s3: + self.options["aws-sdk-cpp"].shared = False + self.options["aws-sdk-cpp"].build_only = "s3" + + def layout(self): + # Place generators flat under the output folder so our CMakePresets.json + # toolchainFile path (build//conan/generators/conan_toolchain.cmake) + # resolves correctly without build_type nesting. + self.folders.generators = "generators" + + def generate(self): + tc = CMakeToolchain(self) + ws = self.recipe_folder + "/.." + tc.variables["TRITON_COMMON_SOURCE_DIR"] = ws + "/common" + tc.variables["TRITON_CORE_SOURCE_DIR"] = ws + "/core" + tc.variables["TRITON_BACKEND_SOURCE_DIR"] = ws + "/backend" + tc.variables["TRITON_ENABLE_GRPC"] = self.options.enable_grpc + tc.variables["TRITON_ENABLE_HTTP"] = self.options.enable_http + tc.variables["TRITON_ENABLE_METRICS"] = self.options.enable_metrics + tc.variables["TRITON_ENABLE_TRACING"] = self.options.enable_tracing + tc.variables["TRITON_ENABLE_GCS"] = self.options.enable_gcs + tc.variables["TRITON_ENABLE_S3"] = self.options.enable_s3 + tc.variables["TRITON_ENABLE_AZURE_STORAGE"] = self.options.enable_azure_storage + tc.variables["TRITON_ENABLE_GPU"] = self.options.enable_gpu + tc.variables["TRITON_SKIP_THIRD_PARTY_FETCH"] = True + tc.generate() + CMakeDeps(self).generate() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..42f524142a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +# Python runtime dependencies for Triton Inference Server +# +# TensorRT-LLM is installed as a pre-built wheel — NOT built from source. +# The wheel provides both the Python bindings and the required C++ shared +# libraries (libtensorrt_llm.so, libnvinfer_plugin_tensorrt_llm.so, etc.). +# +--extra-index-url https://pypi.nvidia.com + +# TensorRT-LLM runtime (wheel replaces the TensorRT-LLM/ source clone) +tensorrt-llm==1.2.0 + +# OpenAI-compatible HTTP frontend +fastapi==0.121.2 +httpx==0.27.2 +openai>=1.107.3 +starlette>=0.49.1 +partial-json-parser + +# Utilities +scipy>=1.11.0 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9568f4280d..cce1143efe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -24,7 +24,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -cmake_minimum_required (VERSION 3.31.8) +cmake_minimum_required(VERSION 3.31.8) project(tritonserverexe LANGUAGES C CXX) @@ -33,38 +33,20 @@ include(GNUInstallDirs) # # Dependencies # -# We must include the transitive closure of all repos so that we can -# override the tag. The backend repo is needed for the tests. +# Internal Triton repos are brought in via add_subdirectory in the parent +# CMakeLists.txt (server/CMakeLists.txt). Their targets are already in scope. # -include(FetchContent) - -FetchContent_Declare( - repo-common - GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/common.git - GIT_TAG ${TRITON_COMMON_REPO_TAG} -) -FetchContent_Declare( - repo-core - GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/core.git - GIT_TAG ${TRITON_CORE_REPO_TAG} -) -FetchContent_Declare( - repo-backend - GIT_REPOSITORY ${TRITON_REPO_ORGANIZATION}/backend.git - GIT_TAG ${TRITON_BACKEND_REPO_TAG} -) if(TRITON_ENABLE_GRPC) set(TRITON_COMMON_ENABLE_PROTOBUF ON) set(TRITON_COMMON_ENABLE_GRPC ON) endif() # TRITON_ENABLE_GRPC -FetchContent_MakeAvailable(repo-common repo-core repo-backend) - # CUDA # if(${TRITON_ENABLE_GPU}) find_package(CUDAToolkit REQUIRED) + set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) message(STATUS "Using CUDA ${CUDA_VERSION}") endif() # TRITON_ENABLE_GPU @@ -209,14 +191,7 @@ if(${TRITON_ENABLE_GPU}) ) endif() # TRITON_ENABLE_GPU -if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR - ${TRITON_ENABLE_SAGEMAKER} OR ${TRITON_ENABLE_VERTEX_AI}) - target_include_directories( - main - PRIVATE - ${LIBEVENT_INCLUDE_DIRS} - ) -endif() + # Libevent include dirs come via the Conan-generated target's INTERFACE_INCLUDE_DIRECTORIES if(${TRITON_ENABLE_HTTP}) @@ -401,7 +376,9 @@ if(${TRITON_ENABLE_HTTP} triton-common-logging # from repo-common triton-core-serverapi # from repo-core triton-core-serverstub # from repo-core - ${LIBEVENT_LIBRARIES} + libevent::core + libevent::extra + libevent::pthreads libevhtp::evhtp re2::re2 ) @@ -654,6 +631,7 @@ if (NOT WIN32) triton-common-error # from repo-common triton-core-serverapi # from repo-core triton-core-serverstub # from repo-core + rapidjson ) if(${TRITON_ENABLE_GPU}) @@ -718,6 +696,7 @@ if (NOT WIN32) triton-common-error # from repo-common triton-core-serverapi # from repo-core triton-core-serverstub # from repo-core + rapidjson ) if(${TRITON_ENABLE_GPU}) @@ -789,6 +768,7 @@ if (NOT WIN32) triton-common-error # from repo-common triton-core-serverapi # from repo-core triton-core-serverstub # from repo-core + rapidjson CUDA::cudart ) diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index f523c4dbdc..f6bf7af2d8 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -57,7 +57,6 @@ if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. ${GTEST_INCLUDE_DIRS} - ${LIBEVENT_INCLUDE_DIRS} ) target_link_libraries( @@ -67,7 +66,8 @@ if(${TRITON_ENABLE_HTTP} OR ${TRITON_ENABLE_METRICS} OR triton-core-serverstub # from repo-core GTest::gtest GTest::gtest_main - ${LIBEVENT_LIBRARIES} + libevent::core + libevent::extra -lz ) diff --git a/src/test/distributed_addsub/CMakeLists.txt b/src/test/distributed_addsub/CMakeLists.txt index 597e70308c..3465225e17 100644 --- a/src/test/distributed_addsub/CMakeLists.txt +++ b/src/test/distributed_addsub/CMakeLists.txt @@ -64,7 +64,7 @@ set_target_properties( POSITION_INDEPENDENT_CODE ON OUTPUT_NAME triton_distributed_addsub LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_distributed_addsub.ldscript - LINK_FLAGS "-Wl,--version-script libtriton_distributed_addsub.ldscript" + LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_distributed_addsub.ldscript" ) # diff --git a/src/test/dyna_sequence/CMakeLists.txt b/src/test/dyna_sequence/CMakeLists.txt index 2bed7c2d36..9eb93387fd 100644 --- a/src/test/dyna_sequence/CMakeLists.txt +++ b/src/test/dyna_sequence/CMakeLists.txt @@ -64,7 +64,7 @@ set_target_properties( POSITION_INDEPENDENT_CODE ON OUTPUT_NAME triton_dyna_sequence LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dyna_sequence.ldscript - LINK_FLAGS "-Wl,--version-script libtriton_dyna_sequence.ldscript" + LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_dyna_sequence.ldscript" ) # diff --git a/src/test/implicit_state/CMakeLists.txt b/src/test/implicit_state/CMakeLists.txt index 4b6024a9c7..058ca1c80c 100644 --- a/src/test/implicit_state/CMakeLists.txt +++ b/src/test/implicit_state/CMakeLists.txt @@ -64,7 +64,7 @@ set_target_properties( POSITION_INDEPENDENT_CODE ON OUTPUT_NAME triton_implicit_state LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_implicit_state.ldscript - LINK_FLAGS "-Wl,--version-script libtriton_implicit_state.ldscript" + LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_implicit_state.ldscript" ) # diff --git a/src/test/iterative_sequence/CMakeLists.txt b/src/test/iterative_sequence/CMakeLists.txt index 4a44925524..3a49fd94ff 100644 --- a/src/test/iterative_sequence/CMakeLists.txt +++ b/src/test/iterative_sequence/CMakeLists.txt @@ -64,7 +64,7 @@ set_target_properties( POSITION_INDEPENDENT_CODE ON OUTPUT_NAME triton_iterative_sequence LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_iterative_sequence.ldscript - LINK_FLAGS "-Wl,--version-script libtriton_iterative_sequence.ldscript" + LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_iterative_sequence.ldscript" ) # diff --git a/src/test/query_backend/CMakeLists.txt b/src/test/query_backend/CMakeLists.txt index 30c33a6b50..feaa93bed2 100644 --- a/src/test/query_backend/CMakeLists.txt +++ b/src/test/query_backend/CMakeLists.txt @@ -64,7 +64,7 @@ set_target_properties( POSITION_INDEPENDENT_CODE ON OUTPUT_NAME triton_query LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_query.ldscript - LINK_FLAGS "-Wl,--version-script libtriton_query.ldscript" + LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_query.ldscript" ) # diff --git a/src/test/repoagent/relocation_repoagent/CMakeLists.txt b/src/test/repoagent/relocation_repoagent/CMakeLists.txt index d0b97b5d24..881e5c609d 100644 --- a/src/test/repoagent/relocation_repoagent/CMakeLists.txt +++ b/src/test/repoagent/relocation_repoagent/CMakeLists.txt @@ -61,7 +61,7 @@ set_target_properties( POSITION_INDEPENDENT_CODE ON OUTPUT_NAME tritonrepoagent_relocation LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtritonrepoagent_relocation.ldscript - LINK_FLAGS "-Wl,--version-script libtritonrepoagent_relocation.ldscript" + LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtritonrepoagent_relocation.ldscript" ) # diff --git a/src/test/sequence/CMakeLists.txt b/src/test/sequence/CMakeLists.txt index ab105f20eb..2dbfb5a0b9 100644 --- a/src/test/sequence/CMakeLists.txt +++ b/src/test/sequence/CMakeLists.txt @@ -64,7 +64,7 @@ set_target_properties( POSITION_INDEPENDENT_CODE ON OUTPUT_NAME triton_sequence LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_sequence.ldscript - LINK_FLAGS "-Wl,--version-script libtriton_sequence.ldscript" + LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_sequence.ldscript" ) #