Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 159 additions & 62 deletions sdk_v2/cpp/src/ep_detection/cuda_ep_bootstrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,26 @@
// Licensed under the MIT License.
#include "ep_detection/cuda_ep_bootstrapper.h"

#include "http/http_client.h"
#include "http/http_download.h"
#include "logger.h"
#include "util/file_lock.h"
#include "http/http_download.h"
#include "util/sha256.h"
#include "util/zip_extract.h"

#include <fmt/format.h>
#include <nlohmann/json.hpp>

#include <algorithm>
#include <atomic>
#include <cctype>
#include <cstdio>
#include <filesystem>
#include <map>
#include <optional>
#include <stdexcept>
#include <string>
#include <unordered_map>

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
Expand All @@ -25,60 +32,116 @@ namespace {

constexpr const char* kPackageFileName = "cuda-ep.zip";
constexpr const char* kLockFileName = "cuda-ep.lock";
constexpr const char* kStagingDirName = "cuda-ep-staging";
constexpr const char* kUserAgent = "FoundryLocal";
constexpr int kMaxInstallAttempts = 5;

// CUDA EP package is built against the ONNX Runtime version we link against, so
// WinML and non-WinML builds need separate downloads. Hashes mirror the C# core
// (see neutron.main/src/Service/Providers/Detector/CudaEpBootstrapper.cs).
// WinML build -> ORT 1.23.2 (cuda-ep-20260501-182408.zip)
// Non-WinML -> ORT 1.25.1 (cuda-ep-20260501-062935.zip)
#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML
constexpr const char* kDownloadUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-182408.zip";
#else
constexpr const char* kDownloadUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda-ep-20260501-062935.zip";
#endif

struct ExpectedBinary {
const char* filename;
const char* sha256;
// Manifest URL on the CDN — published by the CUDA EP upload pipeline.
constexpr const char* kManifestUrl =
"https://foundrypackages-ffhrdhbxb7gpdreh.b02.azurefd.net/cuda_ep_prod.json";

// -----------------------------------------------------------------------
// Platform detection
//
// Returns the manifest platform key and ORT registration library filename
// for the current build target, or std::nullopt if unsupported.
//
// To add a platform:
// 1. Uncomment its #elif block below.
// 2. Uncomment its entry in $binaryNames / $expectedPlatforms in
// cuda-ep-upload.yml and update $platformPattern there too.
// -----------------------------------------------------------------------
struct PlatformInfo {
const char* key; // manifest lookup key, e.g. "win-x64"
const char* ep_lib; // ORT registration library filename
};

#if defined(FOUNDRY_LOCAL_USE_WINML) && FOUNDRY_LOCAL_USE_WINML
constexpr ExpectedBinary kExpectedBinaries[] = {
{"onnxruntime_providers_cuda.dll", "4CEF18654878CEFCFCF8488E9C3A705EB5327AA9B5556155C319C9CBB2D98FCF"},
{"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"},
};
std::optional<PlatformInfo> GetPlatformInfo() {
#if defined(_WIN32) && !defined(_M_ARM64)
return PlatformInfo{"win-x64", "onnxruntime_providers_cuda_plugin.dll"};

// Uncomment when win-arm64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(_WIN32) && defined(_M_ARM64)
// return PlatformInfo{"win-arm64", "onnxruntime_providers_cuda_plugin.dll"};

// Uncomment when linux-x64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(__linux__) && defined(__x86_64__)
// return PlatformInfo{"linux-x64", "libonnxruntime_providers_cuda_plugin.so"};

// Uncomment when linux-arm64 CUDA EP build is available (see cuda-ep-upload.yml):
// #elif defined(__linux__) && defined(__aarch64__)
// return PlatformInfo{"linux-arm64", "libonnxruntime_providers_cuda_plugin.so"};

#else
constexpr ExpectedBinary kExpectedBinaries[] = {
{"onnxruntime_providers_cuda.dll", "DD540FCFECFBC68B4675C9ADF09C2858CF6B054563859D79598AA2524406A76F"},
{"onnxruntime-genai-cuda.dll", "BC953F8E2AAFC6219B2D723B65AB8F1A9426A6B7724D6A01ED756FAE8C3DE6AE"},
};
return std::nullopt; // Platform not yet supported — graceful no-op.
#endif
}

constexpr const char* kRegistrationName = "Foundry.CUDA";
constexpr const char* kCudaProviderDll = "onnxruntime_providers_cuda.dll";

struct ManifestInfo {
std::string version;
std::string download_url;
std::unordered_map<std::string, std::string> sha256; // filename -> expected hash
};

/// Fetch and parse the CUDA EP manifest from the CDN.
/// Returns the package entry for the given platform key.
ManifestInfo FetchManifest(const char* platform_key, fl::ILogger& logger) {
logger.Log(fl::LogLevel::Debug,
fmt::format("CUDA EP: fetching manifest from {}", kManifestUrl));

auto body = fl::http::HttpGetWithRetry(kManifestUrl, kUserAgent, logger);
auto j = nlohmann::json::parse(body);

ManifestInfo info;
info.version = j.at("version").get<std::string>();

auto& packages = j.at("packages");
if (!packages.contains(platform_key)) {
throw std::runtime_error(
fmt::format("CUDA EP manifest has no entry for platform '{}'", platform_key));
}

auto& pkg = packages.at(platform_key);
info.download_url = pkg.at("url").get<std::string>();

for (auto& [filename, hash] : pkg.at("sha256").items()) {
info.sha256[filename] = hash.get<std::string>();
}

return info;
}

/// Verify all expected binaries exist and have correct SHA256 hashes.
bool VerifyPackage(const std::filesystem::path& dir, fl::ILogger& logger) {
for (const auto& expected : kExpectedBinaries) {
auto file_path = dir / expected.filename;
/// Logs the name of the first missing or mismatched file to aid diagnosis.
bool VerifyPackage(const std::filesystem::path& dir,
const std::unordered_map<std::string, std::string>& expected_hashes,
fl::ILogger& logger) {
// Quick sentinel check before the expensive SHA256 work.
if (!std::filesystem::exists(dir)) {
logger.Log(fl::LogLevel::Debug,
fmt::format("CUDA EP: package directory does not exist: {}", dir.string()));
return false;
}

for (const auto& [filename, expected_hash] : expected_hashes) {
auto file_path = dir / filename;

if (!std::filesystem::exists(file_path)) {
logger.Log(fl::LogLevel::Debug,
fmt::format("CUDA EP: package file missing: {}", file_path.string()));
return false;
}

auto hash = fl::Sha256File(file_path);

// Case-insensitive comparison
std::string expected_hash(expected.sha256);
if (!std::equal(hash.begin(), hash.end(), expected_hash.begin(), expected_hash.end(),
[](char a, char b) { return std::toupper(a) == std::toupper(b); })) {
logger.Log(fl::LogLevel::Warning,
fmt::format("CUDA EP: hash mismatch for {}: got {}, expected {}",
expected.filename, hash, expected.sha256));
filename, hash, expected_hash));
return false;
}
}
Expand Down Expand Up @@ -118,74 +181,101 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,

attempts_++;

// Bail out early if this platform is not yet in the manifest.
auto platform_info = GetPlatformInfo();
if (!platform_info) {
logger.Log(LogLevel::Information, "CUDA EP: current platform is not yet supported");
return false;
}

auto ep_dir = std::filesystem::path(ep_dir_);
auto lock_path = ep_dir.parent_path() / kLockFileName;
auto zip_path = ep_dir.parent_path() / kPackageFileName;
auto parent_dir = ep_dir.parent_path();

try {
// Cross-process lock to prevent concurrent installs
FileLock lock(lock_path);
// Fetch the manifest before acquiring the lock to avoid holding it during network I/O.
auto manifest = FetchManifest(platform_info->key, logger);
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: manifest fetched (version={}, platform={})",
manifest.version, platform_info->key));

// Cross-process lock to prevent concurrent installs.
std::filesystem::create_directories(parent_dir);
FileLock lock(parent_dir / kLockFileName);

// Check if package already exists and is valid
if (VerifyPackage(ep_dir, logger)) {
// Re-check after acquiring the lock — another process may have already updated.
if (!force && VerifyPackage(ep_dir, manifest.sha256, logger)) {
logger.Log(LogLevel::Information, "CUDA EP: package already valid, skipping download");
} else {
// Clean up any partial install
if (std::filesystem::exists(ep_dir)) {
std::filesystem::remove_all(ep_dir);
// Download to a staging directory so a failure never corrupts the existing install.
auto staging_dir = parent_dir / kStagingDirName;
if (std::filesystem::exists(staging_dir)) {
std::filesystem::remove_all(staging_dir);
}
std::filesystem::create_directories(staging_dir);

std::filesystem::create_directories(ep_dir);
auto zip_path = staging_dir / kPackageFileName;

// Download
logger.Log(LogLevel::Information, "CUDA EP: downloading from CDN...");
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: downloading for {}...", platform_info->key));
logger.Log(LogLevel::Debug,
fmt::format("CUDA EP: download URL is {}", manifest.download_url));

// Bridge callback-based cancellation to the atomic flag HttpDownloadFile expects
std::atomic<bool> cancel_flag{false};

auto download_progress = [&](float pct) {
if (progress_cb) {
// 0-80% for download phase
// 080% for the download phase.
if (!progress_cb(name_, pct * 0.8f)) {
cancel_flag.store(true);
}
}
};

if (!HttpDownloadFile(kDownloadUrl, zip_path, kUserAgent,
if (!HttpDownloadFile(manifest.download_url, zip_path, kUserAgent,
&cancel_flag, download_progress, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: download failed (see prior log for details)");
std::filesystem::remove_all(staging_dir);
return false;
}

// Extract
logger.Log(LogLevel::Information, "CUDA EP: extracting...");
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: extracting package to {}", staging_dir.string()));

if (!ExtractZip(zip_path, ep_dir, logger)) {
if (!ExtractZip(zip_path, staging_dir, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: extraction failed");
std::filesystem::remove_all(staging_dir);
return false;
}

// Clean up zip
std::filesystem::remove(zip_path);

// Verify
if (!VerifyPackage(ep_dir, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: verification failed after download");
if (!VerifyPackage(staging_dir, manifest.sha256, logger)) {
logger.Log(LogLevel::Warning, "CUDA EP: verification failed after extraction");
std::filesystem::remove_all(staging_dir);
return false;
}

logger.Log(LogLevel::Debug,
fmt::format("CUDA EP: staging verification succeeded, promoting to {}",
ep_dir.string()));

// Atomic swap: delete old install, rename staging to target.
if (std::filesystem::exists(ep_dir)) {
std::filesystem::remove_all(ep_dir);
}
std::filesystem::rename(staging_dir, ep_dir);
logger.Log(LogLevel::Information, "CUDA EP: successfully installed.");
}

if (progress_cb) {
progress_cb(name_, 90.0f);
}

// Register with ORT
// Register with ORT.
#ifdef _WIN32
// Permanently prepend the EP directory to PATH. The zip bundles all
// required CUDA/cuDNN DLLs, so no system CUDA install is needed.
// PATH must stay modified for the process lifetime because:
// - onnxruntime_providers_cuda.dll delay-loads some dependencies
// - onnxruntime_providers_cuda_plugin.dll delay-loads CUDA dependencies
// - onnxruntime-genai-cuda.dll is loaded later at model-load time
// - ORT creates CUDA sessions after registration
{
Expand All @@ -202,9 +292,17 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,
}
#endif

auto cuda_dll_path = ep_dir / kCudaProviderDll;
auto cuda_lib_path = ep_dir / platform_info->ep_lib;

// NOTE: RegisterExecutionProviderLibrary loads the CUDA plugin DLL, which
// initializes the CUDA runtime and cuDNN. This can take 30–60 seconds on
// first use — especially on machines with large cuDNN caches or slow VRAM
// init. This is normal; it is NOT a hang in the bootstrapper itself.
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: registering provider library {} (CUDA init may take ~30s)...",
cuda_lib_path.string()));

if (!register_ep_(kRegistrationName, cuda_dll_path)) {
if (!register_ep_(kRegistrationName, cuda_lib_path)) {
logger.Log(LogLevel::Warning, "CUDA EP: ORT registration failed");
return false;
}
Expand All @@ -215,10 +313,9 @@ bool CudaEpBootstrapper::DownloadAndRegister(bool force,
progress_cb(name_, 100.0f);
}

// Bootstrapper-side log — captures the install dir, which the central
// register_ep callback (logs library + version) doesn't have.
logger.Log(LogLevel::Information,
fmt::format("CUDA EP: ready (install_path={})", ep_dir.string()));
fmt::format("CUDA EP: ready (install_path={}, version={})",
ep_dir.string(), manifest.version));
return true;
} catch (const std::exception& e) {
logger.Log(LogLevel::Warning, fmt::format("CUDA EP: error: {}", e.what()));
Expand Down
6 changes: 5 additions & 1 deletion sdk_v2/cpp/src/spdlog_logger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ SpdlogLogger::SpdlogLogger(LogLevel min_level, const std::string& logs_dir) {
spdlog::async_overflow_policy::block);

logger_->set_level(ToSpdlogLevel(min_level));
logger_->flush_on(spdlog::level::warn);
// Flush immediately at the configured level so every message that passes the
// level filter is guaranteed to reach the file sink. Without this, the async
// queue only flushes on warn+, causing debug/info messages to be lost if the
// process exits before the next warning.
logger_->flush_on(ToSpdlogLevel(min_level));

spdlog::register_logger(logger_);
}
Expand Down