forked from pytorch/TensorRT
-
Notifications
You must be signed in to change notification settings - Fork 0
Move TRT-RTX runtime controls to runtime context managers (v3, for review) #3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
tp5uiuc
wants to merge
9
commits into
feat/trtrtx-cpp-runtime-v2
Choose a base branch
from
feat/trtrtx-runtime-ctx-managers
base: feat/trtrtx-cpp-runtime-v2
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
90a21e4
refactor(runtime): move TRT-RTX runtime controls to runtime context m…
tp5uiuc 415bea7
fix(runtime): support None and disk-backed runtime_cache for C++ runtime
tp5uiuc 0d360de
refactor(runtime): TRTRuntimeConfig owns RuntimeSettings; unified Run…
tp5uiuc 4bfa982
runtime: address PR review feedback
tp5uiuc 34fa610
runtime: cpp implicit-handle swap on set_runtime_settings
tp5uiuc 111cdb2
runtime: PR review feedback round 2 on RuntimeSettings + RuntimeCache…
tp5uiuc 363d20b
runtime: lazy IExecutionContext creation in cpp TRTEngine
tp5uiuc dea2d06
runtime: cross Py->C++ boundary as ints; reverse-map for logging
tp5uiuc 38b7033
runtime: PR review round 4 -- enum class, getter/setter overload, polish
tp5uiuc File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| #include "core/runtime/RuntimeSettings.h" | ||
|
|
||
| #include <array> | ||
| #include <cstring> | ||
| #include <sstream> | ||
| #include <tuple> | ||
| #include <type_traits> | ||
|
|
||
| #include "core/util/prelude.h" | ||
|
|
||
| namespace torch_tensorrt { | ||
| namespace core { | ||
| namespace runtime { | ||
|
|
||
| namespace { | ||
|
|
||
| // Reverse-lookup tables. Indices match the enum integer values (which mirror | ||
| // the nvinfer1 enums). Out-of-range -> "<unknown>". | ||
| constexpr std::array<std::string_view, 3> kDsStrategyNames = {"lazy", "eager", "none"}; | ||
| constexpr std::array<std::string_view, 2> kCgStrategyNames = {"disabled", "whole_graph_capture"}; | ||
|
|
||
| } // namespace | ||
|
|
||
| DynamicShapesKernelSpecializationStrategy to_dynamic_shapes_kernel_strategy(int32_t v) { | ||
| TORCHTRT_CHECK( | ||
| v >= 0 && static_cast<size_t>(v) < kDsStrategyNames.size(), | ||
| "Invalid dynamic_shapes_kernel_specialization_strategy int: " << v | ||
| << " (expected 0..2 mapping to lazy|eager|none)"); | ||
| return static_cast<DynamicShapesKernelSpecializationStrategy>(v); | ||
| } | ||
|
|
||
| CudaGraphStrategy to_cuda_graph_strategy(int32_t v) { | ||
| TORCHTRT_CHECK( | ||
| v >= 0 && static_cast<size_t>(v) < kCgStrategyNames.size(), | ||
| "Invalid cuda_graph_strategy int: " << v << " (expected 0..1 mapping to disabled|whole_graph_capture)"); | ||
| return static_cast<CudaGraphStrategy>(v); | ||
| } | ||
|
|
||
| std::string_view ds_strategy_name(DynamicShapesKernelSpecializationStrategy v) { | ||
| auto const i = static_cast<std::underlying_type_t<decltype(v)>>(v); | ||
| if (i < 0 || static_cast<size_t>(i) >= kDsStrategyNames.size()) { | ||
| return "<unknown>"; | ||
| } | ||
| return kDsStrategyNames[static_cast<size_t>(i)]; | ||
| } | ||
|
|
||
| std::string_view cg_strategy_name(CudaGraphStrategy v) { | ||
| auto const i = static_cast<std::underlying_type_t<decltype(v)>>(v); | ||
| if (i < 0 || static_cast<size_t>(i) >= kCgStrategyNames.size()) { | ||
| return "<unknown>"; | ||
| } | ||
| return kCgStrategyNames[static_cast<size_t>(i)]; | ||
| } | ||
|
|
||
| // ---- RuntimeCacheHandle methods --------------------------------------------- | ||
| // | ||
| // The ``#ifdef TRT_MAJOR_RTX`` is intentionally confined to this translation | ||
| // unit: the public header advertises a uniform interface (always-callable | ||
| // methods that simply degrade to no-ops on non-RTX builds), and the JIT-binding | ||
| // registration file (``register_jit_hooks.cpp``) calls these as plain member | ||
| // references with zero conditional compilation. | ||
|
|
||
| at::Tensor RuntimeCacheHandle::serialize() const { | ||
| auto const opts = at::TensorOptions().dtype(at::kByte); | ||
| auto const empty = [&]() { return at::empty({0}, opts); }; | ||
| #ifdef TRT_MAJOR_RTX | ||
| if (!trt_handle) { | ||
| return empty(); | ||
| } | ||
| auto host_mem = make_trt(trt_handle->serialize()); | ||
| if (!host_mem) { | ||
| return empty(); | ||
| } | ||
| auto tensor = at::empty({static_cast<int64_t>(host_mem->size())}, opts); | ||
| std::memcpy(tensor.data_ptr(), host_mem->data(), host_mem->size()); | ||
| return tensor; | ||
| #else | ||
| return empty(); | ||
| #endif | ||
| } | ||
|
|
||
| void RuntimeCacheHandle::deserialize(TORCHTRT_UNUSED at::Tensor data) { | ||
| #ifdef TRT_MAJOR_RTX | ||
| if (data.numel() == 0 || !trt_handle) { | ||
| return; | ||
| } | ||
| auto contig = data.contiguous().to(at::kCPU); | ||
| trt_handle->deserialize(contig.data_ptr(), static_cast<size_t>(contig.numel())); | ||
| #endif | ||
| } | ||
|
|
||
| bool RuntimeCacheHandle::has_cache() const { | ||
| #ifdef TRT_MAJOR_RTX | ||
| return trt_handle != nullptr; | ||
| #else | ||
| return false; | ||
| #endif | ||
| } | ||
|
|
||
| // ---- RuntimeSettings methods ------------------------------------------------ | ||
|
|
||
| bool RuntimeSettings::operator==(RuntimeSettings const& other) const noexcept { | ||
| // ``runtime_cache`` compares by pointer identity: passing the same handle | ||
| // twice through the settings setter is a no-op. Hoisted into locals because | ||
| // ``std::tie`` requires lvalues. | ||
| auto* this_cache = runtime_cache.get(); | ||
| auto* other_cache = other.runtime_cache.get(); | ||
| return std::tie(dynamic_shapes_kernel_specialization_strategy, cuda_graph_strategy, this_cache) == | ||
| std::tie(other.dynamic_shapes_kernel_specialization_strategy, other.cuda_graph_strategy, other_cache); | ||
| } | ||
|
|
||
| std::string RuntimeSettings::to_str() const { | ||
| std::ostringstream os; | ||
| os << "RuntimeSettings{" << std::endl; | ||
| os << " Dynamic Shapes Kernel Strategy: " << ds_strategy_name(dynamic_shapes_kernel_specialization_strategy) | ||
| << std::endl; | ||
| os << " CUDA Graph Strategy: " << cg_strategy_name(cuda_graph_strategy) << std::endl; | ||
| if (runtime_cache) { | ||
| auto const& p = runtime_cache->path; | ||
| os << " Runtime Cache: " << (p.empty() ? "<in-memory shared>" : p) << std::endl; | ||
| } else { | ||
| os << " Runtime Cache: <engine-local, in-memory>" << std::endl; | ||
| } | ||
| os << "}"; | ||
| return os.str(); | ||
| } | ||
|
|
||
| std::ostream& operator<<(std::ostream& os, RuntimeSettings const& rs) { | ||
| os << rs.to_str(); | ||
|
tp5uiuc marked this conversation as resolved.
|
||
| return os; | ||
| } | ||
|
|
||
| } // namespace runtime | ||
| } // namespace core | ||
| } // namespace torch_tensorrt | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| #pragma once | ||
|
|
||
| #include <memory> | ||
| #include <ostream> | ||
| #include <string> | ||
| #include <string_view> | ||
|
|
||
| #include "ATen/core/Tensor.h" | ||
| #include "ATen/core/ivalue.h" | ||
| #include "NvInfer.h" | ||
| #include "torch/custom_class.h" | ||
|
|
||
| namespace torch_tensorrt { | ||
| namespace core { | ||
| namespace runtime { | ||
|
|
||
| // A passive wrapper around an ``IRuntimeCache``. Registered as a torchbind class | ||
| // so it can be passed by ``c10::intrusive_ptr`` across the Python/C++ boundary; | ||
| // the same handle gives both runtimes the same underlying ``IRuntimeCache*``. | ||
| // | ||
| // File I/O lives on the Python side (filelock + on-disk persistence via | ||
| // the ``serialize`` / ``deserialize`` members below). The C++ struct is purely | ||
| // a holder; ``path`` is informational and is not consulted by the C++ runtime. | ||
| struct RuntimeCacheHandle : public torch::CustomClassHolder { | ||
| std::string path; | ||
|
|
||
| #ifdef TRT_MAJOR_RTX | ||
| // The live TensorRT runtime cache. The first engine that attaches this handle | ||
| // materializes it via ``IRuntimeConfig::createRuntimeCache()`` and writes the | ||
| // shared_ptr here; subsequent engines reuse the same pointer for true sharing. | ||
| std::shared_ptr<nvinfer1::IRuntimeCache> trt_handle; | ||
| #endif | ||
|
|
||
| explicit RuntimeCacheHandle(std::string p = "") : path(std::move(p)) {} | ||
|
|
||
| // Expose the underlying ``IRuntimeCache`` bytes for the Python side to persist | ||
| // under filelock. Returns an empty uint8 tensor when no cache is attached, or | ||
| // on non-RTX builds. | ||
| // | ||
| // ``at::Tensor`` is used (rather than ``std::string``) because TorchBind | ||
| // forces ``std::string`` to round-trip through Python ``str`` (UTF-8), and | ||
| // serialized cache bytes are not valid UTF-8. | ||
| [[nodiscard]] at::Tensor serialize() const; | ||
|
|
||
| // Inverse of ``serialize``. Expects a uint8 ``at::Tensor``. No-op for empty | ||
| // input, when the underlying ``IRuntimeCache`` has not been materialized yet, | ||
| // or on non-RTX builds. | ||
| void deserialize(at::Tensor data); | ||
|
|
||
| // True iff an engine has populated the underlying ``IRuntimeCache``. | ||
| // Always false on non-RTX builds. | ||
| [[nodiscard]] bool has_cache() const; | ||
| }; | ||
|
|
||
| // Strategy enums mirroring the corresponding ``nvinfer1`` enums on TRT-RTX. | ||
| // Declared here unconditionally so non-RTX builds can still pass these values | ||
| // through the data model -- only the ``static_cast`` to the nvinfer1 type | ||
| // (inside ``TRTRuntimeConfig::ensure_initialized``) is RTX-only. Integer | ||
| // values must stay in sync with the nvinfer1 enums. | ||
| enum class DynamicShapesKernelSpecializationStrategy : int32_t { | ||
| kLAZY = 0, | ||
| kEAGER = 1, | ||
| kNONE = 2, | ||
| }; | ||
|
|
||
| enum class CudaGraphStrategy : int32_t { | ||
| kDISABLED = 0, | ||
| kWHOLE_GRAPH_CAPTURE = 1, | ||
| }; | ||
|
|
||
| // Boundary validators: take the int that crossed the Py->C++ wire and return | ||
| // the enum (or throw with a clear message on out-of-range). Used only inside | ||
| // the torchbind ``update_runtime_settings`` lambda -- the rest of the code | ||
| // passes the enum type directly. | ||
| [[nodiscard]] DynamicShapesKernelSpecializationStrategy to_dynamic_shapes_kernel_strategy(int32_t v); | ||
| [[nodiscard]] CudaGraphStrategy to_cuda_graph_strategy(int32_t v); | ||
|
|
||
| // Per-engine runtime-only knobs sampled at IExecutionContext creation. | ||
| // | ||
| // ``RuntimeSettings`` is a plain struct (not a torchbind class) because we | ||
| // flatten it into positional args at the torchbind boundary -- TorchBind can't | ||
| // carry a dataclass natively. Equality is value-by-value; the cache field | ||
| // compares by pointer identity (same handle -> same cache). | ||
| // | ||
| // The strategy fields are typed enums. The Python user-facing API takes strings | ||
| // (``"lazy" | "eager" | "none"`` etc.) and validates them at the Python | ||
| // boundary; the torchbind lambda then maps the underlying ``int32_t`` to the | ||
| // enum via ``to_*_strategy`` and stores typed values here. | ||
| struct RuntimeSettings { | ||
| DynamicShapesKernelSpecializationStrategy dynamic_shapes_kernel_specialization_strategy = | ||
| DynamicShapesKernelSpecializationStrategy::kLAZY; | ||
| CudaGraphStrategy cuda_graph_strategy = CudaGraphStrategy::kDISABLED; | ||
| c10::intrusive_ptr<RuntimeCacheHandle> runtime_cache = nullptr; | ||
|
|
||
| bool operator==(RuntimeSettings const& other) const noexcept; | ||
| bool operator!=(RuntimeSettings const& other) const noexcept { | ||
| return !(*this == other); | ||
| } | ||
|
|
||
| [[nodiscard]] std::string to_str() const; | ||
| }; | ||
|
|
||
| // Reverse-lookup helpers used by ``to_str`` and ``operator<<``. Out-of-range | ||
| // values render as ``"<unknown>"``. Defined here so other translation units | ||
| // (e.g. ``TRTEngine.cpp`` for ``LOG_DEBUG``) can use the same mapping. | ||
| [[nodiscard]] std::string_view ds_strategy_name(DynamicShapesKernelSpecializationStrategy v); | ||
| [[nodiscard]] std::string_view cg_strategy_name(CudaGraphStrategy v); | ||
|
|
||
| std::ostream& operator<<(std::ostream& os, RuntimeSettings const& rs); | ||
|
|
||
| } // namespace runtime | ||
| } // namespace core | ||
| } // namespace torch_tensorrt |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Quick Q : maybe it makes sense for this to be rewritten similar to
Bonus points if we abstract the logic and reuse it for
cg_strategy_nameas well (the names array will be a input parameter).