tp5uiuc · tp5uiuc · Jun 3, 2026 · Jun 3, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/core/runtime/BUILD b/core/runtime/BUILD
@@ -86,6 +86,7 @@ cc_library(
         "DeviceList.cpp",
         "Platform.cpp",
         "RTDevice.cpp",
+        "RuntimeSettings.cpp",
         "TRTEngine.cpp",
         "TRTEngineProfiler.cpp",
         "TRTRuntimeConfig.cpp",
@@ -96,6 +97,7 @@ cc_library(
     hdrs = [
         "Platform.h",
         "RTDevice.h",
+        "RuntimeSettings.h",
         "TRTEngine.h",
         "TRTEngineProfiler.h",
         "TRTRuntimeConfig.h",
@@ -158,6 +160,7 @@ cc_library(
     hdrs = [
         "Platform.h",
         "RTDevice.h",
+        "RuntimeSettings.h",
         "TRTEngine.h",
         "TRTEngineProfiler.h",
         "TensorRTBindingNames.h",
@@ -174,6 +177,7 @@ filegroup(
     srcs = [
         "Platform.h",
         "RTDevice.h",
+        "RuntimeSettings.h",
         "TRTEngine.h",
         "TRTEngineProfiler.h",
         "TRTRuntimeConfig.h",

diff --git a/core/runtime/RuntimeSettings.cpp b/core/runtime/RuntimeSettings.cpp
@@ -0,0 +1,135 @@
+#include "core/runtime/RuntimeSettings.h"
+
+#include <array>
+#include <cstring>
+#include <sstream>
+#include <tuple>
+#include <type_traits>
+
+#include "core/util/prelude.h"
+
+namespace torch_tensorrt {
+namespace core {
+namespace runtime {
+
+namespace {
+
+// Reverse-lookup tables. Indices match the enum integer values (which mirror
+// the nvinfer1 enums). Out-of-range -> "<unknown>".
+constexpr std::array<std::string_view, 3> kDsStrategyNames = {"lazy", "eager", "none"};
+constexpr std::array<std::string_view, 2> kCgStrategyNames = {"disabled", "whole_graph_capture"};
+
+} // namespace
+
+DynamicShapesKernelSpecializationStrategy to_dynamic_shapes_kernel_strategy(int32_t v) {
+  TORCHTRT_CHECK(
+      v >= 0 && static_cast<size_t>(v) < kDsStrategyNames.size(),
+      "Invalid dynamic_shapes_kernel_specialization_strategy int: " << v
+                                                                    << " (expected 0..2 mapping to lazy|eager|none)");
+  return static_cast<DynamicShapesKernelSpecializationStrategy>(v);
+}
+
+CudaGraphStrategy to_cuda_graph_strategy(int32_t v) {
+  TORCHTRT_CHECK(
+      v >= 0 && static_cast<size_t>(v) < kCgStrategyNames.size(),
+      "Invalid cuda_graph_strategy int: " << v << " (expected 0..1 mapping to disabled|whole_graph_capture)");
+  return static_cast<CudaGraphStrategy>(v);
+}
+
+std::string_view ds_strategy_name(DynamicShapesKernelSpecializationStrategy v) {
+  auto const i = static_cast<std::underlying_type_t<decltype(v)>>(v);
+  if (i < 0 || static_cast<size_t>(i) >= kDsStrategyNames.size()) {
+    return "<unknown>";
+  }
+  return kDsStrategyNames[static_cast<size_t>(i)];
+}
+
+std::string_view cg_strategy_name(CudaGraphStrategy v) {
+  auto const i = static_cast<std::underlying_type_t<decltype(v)>>(v);
+  if (i < 0 || static_cast<size_t>(i) >= kCgStrategyNames.size()) {
+    return "<unknown>";
+  }
+  return kCgStrategyNames[static_cast<size_t>(i)];
+}
+
+// ---- RuntimeCacheHandle methods ---------------------------------------------
+//
+// The ``#ifdef TRT_MAJOR_RTX`` is intentionally confined to this translation
+// unit: the public header advertises a uniform interface (always-callable
+// methods that simply degrade to no-ops on non-RTX builds), and the JIT-binding
+// registration file (``register_jit_hooks.cpp``) calls these as plain member
+// references with zero conditional compilation.
+
+at::Tensor RuntimeCacheHandle::serialize() const {
+  auto const opts = at::TensorOptions().dtype(at::kByte);
+  auto const empty = [&]() { return at::empty({0}, opts); };
+#ifdef TRT_MAJOR_RTX
+  if (!trt_handle) {
+    return empty();
+  }
+  auto host_mem = make_trt(trt_handle->serialize());
+  if (!host_mem) {
+    return empty();
+  }
+  auto tensor = at::empty({static_cast<int64_t>(host_mem->size())}, opts);
+  std::memcpy(tensor.data_ptr(), host_mem->data(), host_mem->size());
+  return tensor;
+#else
+  return empty();
+#endif
+}
+
+void RuntimeCacheHandle::deserialize(TORCHTRT_UNUSED at::Tensor data) {
+#ifdef TRT_MAJOR_RTX
+  if (data.numel() == 0 || !trt_handle) {
+    return;
+  }
+  auto contig = data.contiguous().to(at::kCPU);
+  trt_handle->deserialize(contig.data_ptr(), static_cast<size_t>(contig.numel()));
+#endif
+}
+
+bool RuntimeCacheHandle::has_cache() const {
+#ifdef TRT_MAJOR_RTX
+  return trt_handle != nullptr;
+#else
+  return false;
+#endif
+}
+
+// ---- RuntimeSettings methods ------------------------------------------------
+
+bool RuntimeSettings::operator==(RuntimeSettings const& other) const noexcept {
+  // ``runtime_cache`` compares by pointer identity: passing the same handle
+  // twice through the settings setter is a no-op. Hoisted into locals because
+  // ``std::tie`` requires lvalues.
+  auto* this_cache = runtime_cache.get();
+  auto* other_cache = other.runtime_cache.get();
+  return std::tie(dynamic_shapes_kernel_specialization_strategy, cuda_graph_strategy, this_cache) ==
+      std::tie(other.dynamic_shapes_kernel_specialization_strategy, other.cuda_graph_strategy, other_cache);
+}
+
+std::string RuntimeSettings::to_str() const {
+  std::ostringstream os;
+  os << "RuntimeSettings{" << std::endl;
+  os << "  Dynamic Shapes Kernel Strategy: " << ds_strategy_name(dynamic_shapes_kernel_specialization_strategy)
+     << std::endl;
+  os << "  CUDA Graph Strategy: " << cg_strategy_name(cuda_graph_strategy) << std::endl;
+  if (runtime_cache) {
+    auto const& p = runtime_cache->path;
+    os << "  Runtime Cache: " << (p.empty() ? "<in-memory shared>" : p) << std::endl;
+  } else {
+    os << "  Runtime Cache: <engine-local, in-memory>" << std::endl;
+  }
+  os << "}";
+  return os.str();
+}
+
+std::ostream& operator<<(std::ostream& os, RuntimeSettings const& rs) {
+  os << rs.to_str();
+  return os;
+}
+
+} // namespace runtime
+} // namespace core
+} // namespace torch_tensorrt
diff --git a/core/runtime/RuntimeSettings.h b/core/runtime/RuntimeSettings.h
@@ -0,0 +1,113 @@
+#pragma once
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <string_view>
+
+#include "ATen/core/Tensor.h"
+#include "ATen/core/ivalue.h"
+#include "NvInfer.h"
+#include "torch/custom_class.h"
+
+namespace torch_tensorrt {
+namespace core {
+namespace runtime {
+
+// A passive wrapper around an ``IRuntimeCache``. Registered as a torchbind class
+// so it can be passed by ``c10::intrusive_ptr`` across the Python/C++ boundary;
+// the same handle gives both runtimes the same underlying ``IRuntimeCache*``.
+//
+// File I/O lives on the Python side (filelock + on-disk persistence via
+// the ``serialize`` / ``deserialize`` members below). The C++ struct is purely
+// a holder; ``path`` is informational and is not consulted by the C++ runtime.
+struct RuntimeCacheHandle : public torch::CustomClassHolder {
+  std::string path;
+
+#ifdef TRT_MAJOR_RTX
+  // The live TensorRT runtime cache. The first engine that attaches this handle
+  // materializes it via ``IRuntimeConfig::createRuntimeCache()`` and writes the
+  // shared_ptr here; subsequent engines reuse the same pointer for true sharing.
+  std::shared_ptr<nvinfer1::IRuntimeCache> trt_handle;
+#endif
+
+  explicit RuntimeCacheHandle(std::string p = "") : path(std::move(p)) {}
+
+  // Expose the underlying ``IRuntimeCache`` bytes for the Python side to persist
+  // under filelock. Returns an empty uint8 tensor when no cache is attached, or
+  // on non-RTX builds.
+  //
+  // ``at::Tensor`` is used (rather than ``std::string``) because TorchBind
+  // forces ``std::string`` to round-trip through Python ``str`` (UTF-8), and
+  // serialized cache bytes are not valid UTF-8.
+  [[nodiscard]] at::Tensor serialize() const;
+
+  // Inverse of ``serialize``. Expects a uint8 ``at::Tensor``. No-op for empty
+  // input, when the underlying ``IRuntimeCache`` has not been materialized yet,
+  // or on non-RTX builds.
+  void deserialize(at::Tensor data);
+
+  // True iff an engine has populated the underlying ``IRuntimeCache``.
+  // Always false on non-RTX builds.
+  [[nodiscard]] bool has_cache() const;
+};
+
+// Strategy enums mirroring the corresponding ``nvinfer1`` enums on TRT-RTX.
+// Declared here unconditionally so non-RTX builds can still pass these values
+// through the data model -- only the ``static_cast`` to the nvinfer1 type
+// (inside ``TRTRuntimeConfig::ensure_initialized``) is RTX-only. Integer
+// values must stay in sync with the nvinfer1 enums.
+enum class DynamicShapesKernelSpecializationStrategy : int32_t {
+  kLAZY = 0,
+  kEAGER = 1,
+  kNONE = 2,
+};
+
+enum class CudaGraphStrategy : int32_t {
+  kDISABLED = 0,
+  kWHOLE_GRAPH_CAPTURE = 1,
+};
+
+// Boundary validators: take the int that crossed the Py->C++ wire and return
+// the enum (or throw with a clear message on out-of-range). Used only inside
+// the torchbind ``update_runtime_settings`` lambda -- the rest of the code
+// passes the enum type directly.
+[[nodiscard]] DynamicShapesKernelSpecializationStrategy to_dynamic_shapes_kernel_strategy(int32_t v);
+[[nodiscard]] CudaGraphStrategy to_cuda_graph_strategy(int32_t v);
+
+// Per-engine runtime-only knobs sampled at IExecutionContext creation.
+//
+// ``RuntimeSettings`` is a plain struct (not a torchbind class) because we
+// flatten it into positional args at the torchbind boundary -- TorchBind can't
+// carry a dataclass natively. Equality is value-by-value; the cache field
+// compares by pointer identity (same handle -> same cache).
+//
+// The strategy fields are typed enums. The Python user-facing API takes strings
+// (``"lazy" | "eager" | "none"`` etc.) and validates them at the Python
+// boundary; the torchbind lambda then maps the underlying ``int32_t`` to the
+// enum via ``to_*_strategy`` and stores typed values here.
+struct RuntimeSettings {
+  DynamicShapesKernelSpecializationStrategy dynamic_shapes_kernel_specialization_strategy =
+      DynamicShapesKernelSpecializationStrategy::kLAZY;
+  CudaGraphStrategy cuda_graph_strategy = CudaGraphStrategy::kDISABLED;
+  c10::intrusive_ptr<RuntimeCacheHandle> runtime_cache = nullptr;
+
+  bool operator==(RuntimeSettings const& other) const noexcept;
+  bool operator!=(RuntimeSettings const& other) const noexcept {
+    return !(*this == other);
+  }
+
+  [[nodiscard]] std::string to_str() const;
+};
+
+// Reverse-lookup helpers used by ``to_str`` and ``operator<<``. Out-of-range
+// values render as ``"<unknown>"``. Defined here so other translation units
+// (e.g. ``TRTEngine.cpp`` for ``LOG_DEBUG``) can use the same mapping.
+[[nodiscard]] std::string_view ds_strategy_name(DynamicShapesKernelSpecializationStrategy v);
+[[nodiscard]] std::string_view cg_strategy_name(CudaGraphStrategy v);
+
+std::ostream& operator<<(std::ostream& os, RuntimeSettings const& rs);
+
+} // namespace runtime
+} // namespace core
+} // namespace torch_tensorrt