Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/runtime/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ cc_library(
"DeviceList.cpp",
"Platform.cpp",
"RTDevice.cpp",
"RuntimeSettings.cpp",
"TRTEngine.cpp",
"TRTEngineProfiler.cpp",
"TRTRuntimeConfig.cpp",
Expand All @@ -96,6 +97,7 @@ cc_library(
hdrs = [
"Platform.h",
"RTDevice.h",
"RuntimeSettings.h",
"TRTEngine.h",
"TRTEngineProfiler.h",
"TRTRuntimeConfig.h",
Expand Down Expand Up @@ -158,6 +160,7 @@ cc_library(
hdrs = [
"Platform.h",
"RTDevice.h",
"RuntimeSettings.h",
"TRTEngine.h",
"TRTEngineProfiler.h",
"TensorRTBindingNames.h",
Expand All @@ -174,6 +177,7 @@ filegroup(
srcs = [
"Platform.h",
"RTDevice.h",
"RuntimeSettings.h",
"TRTEngine.h",
"TRTEngineProfiler.h",
"TRTRuntimeConfig.h",
Expand Down
135 changes: 135 additions & 0 deletions core/runtime/RuntimeSettings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
#include "core/runtime/RuntimeSettings.h"

#include <array>
#include <cstring>
#include <sstream>
#include <tuple>
#include <type_traits>

#include "core/util/prelude.h"

namespace torch_tensorrt {
namespace core {
namespace runtime {

namespace {

// Reverse-lookup tables. Indices match the enum integer values (which mirror
// the nvinfer1 enums). Out-of-range -> "<unknown>".
constexpr std::array<std::string_view, 3> kDsStrategyNames = {"lazy", "eager", "none"};
constexpr std::array<std::string_view, 2> kCgStrategyNames = {"disabled", "whole_graph_capture"};

} // namespace

DynamicShapesKernelSpecializationStrategy to_dynamic_shapes_kernel_strategy(int32_t v) {
TORCHTRT_CHECK(
v >= 0 && static_cast<size_t>(v) < kDsStrategyNames.size(),
"Invalid dynamic_shapes_kernel_specialization_strategy int: " << v
<< " (expected 0..2 mapping to lazy|eager|none)");
return static_cast<DynamicShapesKernelSpecializationStrategy>(v);
}

CudaGraphStrategy to_cuda_graph_strategy(int32_t v) {
TORCHTRT_CHECK(
v >= 0 && static_cast<size_t>(v) < kCgStrategyNames.size(),
"Invalid cuda_graph_strategy int: " << v << " (expected 0..1 mapping to disabled|whole_graph_capture)");
return static_cast<CudaGraphStrategy>(v);
}

std::string_view ds_strategy_name(DynamicShapesKernelSpecializationStrategy v) {
auto const i = static_cast<std::underlying_type_t<decltype(v)>>(v);
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quick Q : maybe it makes sense for this to be rewritten similar to

auto const i = static_cast<size_t>(v); // This will be an overflow in case v is < 0
if (i == std::clamp(i, 0UL, std::size(kDsStrategyNames)){
    return kDsStrategyNames[i];
}
return "<unknown>"

Bonus points if we abstract the logic and reuse it for cg_strategy_name as well (the names array will be a input parameter).

if (i < 0 || static_cast<size_t>(i) >= kDsStrategyNames.size()) {
return "<unknown>";
}
return kDsStrategyNames[static_cast<size_t>(i)];
}

std::string_view cg_strategy_name(CudaGraphStrategy v) {
auto const i = static_cast<std::underlying_type_t<decltype(v)>>(v);
if (i < 0 || static_cast<size_t>(i) >= kCgStrategyNames.size()) {
return "<unknown>";
}
return kCgStrategyNames[static_cast<size_t>(i)];
}

// ---- RuntimeCacheHandle methods ---------------------------------------------
//
// The ``#ifdef TRT_MAJOR_RTX`` is intentionally confined to this translation
// unit: the public header advertises a uniform interface (always-callable
// methods that simply degrade to no-ops on non-RTX builds), and the JIT-binding
// registration file (``register_jit_hooks.cpp``) calls these as plain member
// references with zero conditional compilation.

at::Tensor RuntimeCacheHandle::serialize() const {
auto const opts = at::TensorOptions().dtype(at::kByte);
auto const empty = [&]() { return at::empty({0}, opts); };
#ifdef TRT_MAJOR_RTX
if (!trt_handle) {
return empty();
}
auto host_mem = make_trt(trt_handle->serialize());
if (!host_mem) {
return empty();
}
auto tensor = at::empty({static_cast<int64_t>(host_mem->size())}, opts);
std::memcpy(tensor.data_ptr(), host_mem->data(), host_mem->size());
return tensor;
#else
return empty();
#endif
}

void RuntimeCacheHandle::deserialize(TORCHTRT_UNUSED at::Tensor data) {
#ifdef TRT_MAJOR_RTX
if (data.numel() == 0 || !trt_handle) {
return;
}
auto contig = data.contiguous().to(at::kCPU);
trt_handle->deserialize(contig.data_ptr(), static_cast<size_t>(contig.numel()));
#endif
}

bool RuntimeCacheHandle::has_cache() const {
#ifdef TRT_MAJOR_RTX
return trt_handle != nullptr;
#else
return false;
#endif
}

// ---- RuntimeSettings methods ------------------------------------------------

bool RuntimeSettings::operator==(RuntimeSettings const& other) const noexcept {
// ``runtime_cache`` compares by pointer identity: passing the same handle
// twice through the settings setter is a no-op. Hoisted into locals because
// ``std::tie`` requires lvalues.
auto* this_cache = runtime_cache.get();
auto* other_cache = other.runtime_cache.get();
return std::tie(dynamic_shapes_kernel_specialization_strategy, cuda_graph_strategy, this_cache) ==
std::tie(other.dynamic_shapes_kernel_specialization_strategy, other.cuda_graph_strategy, other_cache);
}

std::string RuntimeSettings::to_str() const {
std::ostringstream os;
os << "RuntimeSettings{" << std::endl;
os << " Dynamic Shapes Kernel Strategy: " << ds_strategy_name(dynamic_shapes_kernel_specialization_strategy)
<< std::endl;
os << " CUDA Graph Strategy: " << cg_strategy_name(cuda_graph_strategy) << std::endl;
if (runtime_cache) {
auto const& p = runtime_cache->path;
os << " Runtime Cache: " << (p.empty() ? "<in-memory shared>" : p) << std::endl;
} else {
os << " Runtime Cache: <engine-local, in-memory>" << std::endl;
}
os << "}";
return os.str();
}

std::ostream& operator<<(std::ostream& os, RuntimeSettings const& rs) {
os << rs.to_str();
Comment thread
tp5uiuc marked this conversation as resolved.
return os;
}

} // namespace runtime
} // namespace core
} // namespace torch_tensorrt
113 changes: 113 additions & 0 deletions core/runtime/RuntimeSettings.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#pragma once

#include <memory>
#include <ostream>
#include <string>
#include <string_view>

#include "ATen/core/Tensor.h"
#include "ATen/core/ivalue.h"
#include "NvInfer.h"
#include "torch/custom_class.h"

namespace torch_tensorrt {
namespace core {
namespace runtime {

// A passive wrapper around an ``IRuntimeCache``. Registered as a torchbind class
// so it can be passed by ``c10::intrusive_ptr`` across the Python/C++ boundary;
// the same handle gives both runtimes the same underlying ``IRuntimeCache*``.
//
// File I/O lives on the Python side (filelock + on-disk persistence via
// the ``serialize`` / ``deserialize`` members below). The C++ struct is purely
// a holder; ``path`` is informational and is not consulted by the C++ runtime.
struct RuntimeCacheHandle : public torch::CustomClassHolder {
std::string path;

#ifdef TRT_MAJOR_RTX
// The live TensorRT runtime cache. The first engine that attaches this handle
// materializes it via ``IRuntimeConfig::createRuntimeCache()`` and writes the
// shared_ptr here; subsequent engines reuse the same pointer for true sharing.
std::shared_ptr<nvinfer1::IRuntimeCache> trt_handle;
#endif

explicit RuntimeCacheHandle(std::string p = "") : path(std::move(p)) {}

// Expose the underlying ``IRuntimeCache`` bytes for the Python side to persist
// under filelock. Returns an empty uint8 tensor when no cache is attached, or
// on non-RTX builds.
//
// ``at::Tensor`` is used (rather than ``std::string``) because TorchBind
// forces ``std::string`` to round-trip through Python ``str`` (UTF-8), and
// serialized cache bytes are not valid UTF-8.
[[nodiscard]] at::Tensor serialize() const;

// Inverse of ``serialize``. Expects a uint8 ``at::Tensor``. No-op for empty
// input, when the underlying ``IRuntimeCache`` has not been materialized yet,
// or on non-RTX builds.
void deserialize(at::Tensor data);

// True iff an engine has populated the underlying ``IRuntimeCache``.
// Always false on non-RTX builds.
[[nodiscard]] bool has_cache() const;
};

// Strategy enums mirroring the corresponding ``nvinfer1`` enums on TRT-RTX.
// Declared here unconditionally so non-RTX builds can still pass these values
// through the data model -- only the ``static_cast`` to the nvinfer1 type
// (inside ``TRTRuntimeConfig::ensure_initialized``) is RTX-only. Integer
// values must stay in sync with the nvinfer1 enums.
enum class DynamicShapesKernelSpecializationStrategy : int32_t {
kLAZY = 0,
kEAGER = 1,
kNONE = 2,
};

enum class CudaGraphStrategy : int32_t {
kDISABLED = 0,
kWHOLE_GRAPH_CAPTURE = 1,
};

// Boundary validators: take the int that crossed the Py->C++ wire and return
// the enum (or throw with a clear message on out-of-range). Used only inside
// the torchbind ``update_runtime_settings`` lambda -- the rest of the code
// passes the enum type directly.
[[nodiscard]] DynamicShapesKernelSpecializationStrategy to_dynamic_shapes_kernel_strategy(int32_t v);
[[nodiscard]] CudaGraphStrategy to_cuda_graph_strategy(int32_t v);

// Per-engine runtime-only knobs sampled at IExecutionContext creation.
//
// ``RuntimeSettings`` is a plain struct (not a torchbind class) because we
// flatten it into positional args at the torchbind boundary -- TorchBind can't
// carry a dataclass natively. Equality is value-by-value; the cache field
// compares by pointer identity (same handle -> same cache).
//
// The strategy fields are typed enums. The Python user-facing API takes strings
// (``"lazy" | "eager" | "none"`` etc.) and validates them at the Python
// boundary; the torchbind lambda then maps the underlying ``int32_t`` to the
// enum via ``to_*_strategy`` and stores typed values here.
struct RuntimeSettings {
DynamicShapesKernelSpecializationStrategy dynamic_shapes_kernel_specialization_strategy =
DynamicShapesKernelSpecializationStrategy::kLAZY;
CudaGraphStrategy cuda_graph_strategy = CudaGraphStrategy::kDISABLED;
c10::intrusive_ptr<RuntimeCacheHandle> runtime_cache = nullptr;

bool operator==(RuntimeSettings const& other) const noexcept;
bool operator!=(RuntimeSettings const& other) const noexcept {
return !(*this == other);
}

[[nodiscard]] std::string to_str() const;
};

// Reverse-lookup helpers used by ``to_str`` and ``operator<<``. Out-of-range
// values render as ``"<unknown>"``. Defined here so other translation units
// (e.g. ``TRTEngine.cpp`` for ``LOG_DEBUG``) can use the same mapping.
[[nodiscard]] std::string_view ds_strategy_name(DynamicShapesKernelSpecializationStrategy v);
[[nodiscard]] std::string_view cg_strategy_name(CudaGraphStrategy v);

std::ostream& operator<<(std::ostream& os, RuntimeSettings const& rs);

} // namespace runtime
} // namespace core
} // namespace torch_tensorrt
Loading
Loading