diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp index 3f6cfedfe897..0353475ae32e 100644 --- a/ggml/src/ggml-openvino/ggml-decoder.cpp +++ b/ggml/src/ggml-openvino/ggml-decoder.cpp @@ -5,6 +5,7 @@ #include "ggml-openvino.h" #include "ggml-quants.h" #include "ggml.h" +#include "utils.h" #include #include @@ -51,13 +52,12 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph, m_model_weights(model_weights), m_model_params(model_params), m_compute_params(compute_params) { - if (auto * env = getenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS"); env && atoi(env) > 0) { -#ifdef _WIN32 - _putenv_s("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS", ""); -#else - unsetenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS"); -#endif - print_tensor_address_map(cgraph); + static bool printed_address_map = false; + if (!printed_address_map) { + if (ggml_openvino_env_flag("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS")) { + printed_address_map = true; + print_tensor_address_map(cgraph); + } } validate_cgraph(); diff --git a/ggml/src/ggml-openvino/ggml-openvino-extra.cpp b/ggml/src/ggml-openvino/ggml-openvino-extra.cpp index 4140136aca25..d05085606e7a 100644 --- a/ggml/src/ggml-openvino/ggml-openvino-extra.cpp +++ b/ggml/src/ggml-openvino/ggml-openvino-extra.cpp @@ -22,7 +22,31 @@ void ggml_openvino_device_config::init() { if (initialized) { return; } - device_name = getenv("GGML_OPENVINO_DEVICE") ? getenv("GGML_OPENVINO_DEVICE") : "CPU"; + + static constexpr const char* env_var_names[] = { + "GGML_OPENVINO_DEVICE", + "GGML_OPENVINO_CACHE_DIR", + "GGML_OPENVINO_PREFILL_CHUNK_SIZE", + "GGML_OPENVINO_STATEFUL_EXECUTION", + "GGML_OPENVINO_PROFILING", + "GGML_OPENVINO_DUMP_CGRAPH", + "GGML_OPENVINO_DUMP_IR", + "GGML_OPENVINO_DEBUG_INPUT", + "GGML_OPENVINO_DEBUG_OUTPUT", + "GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS", + "GGML_OPENVINO_ENABLE_CACHE", + "GGML_OPENVINO_DISABLE_KV_SLICE", + "GGML_OPENVINO_MANUAL_GQA_ATTN" + }; + + for (const char* const & env_var : env_var_names) { + auto * env = getenv(env_var); + if (env) { + environment_variables[env_var] = env; + } + } + + device_name = ggml_openvino_getenv("GGML_OPENVINO_DEVICE") ? ggml_openvino_getenv("GGML_OPENVINO_DEVICE") : "CPU"; auto available_devices = ov_singleton_core().get_available_devices(); if (std::find(available_devices.begin(), available_devices.end(), device_name) == available_devices.end()) { GGML_LOG_WARN("GGML OpenVINO Backend: device %s is not available, fallback to CPU\n", device_name.c_str()); @@ -30,7 +54,7 @@ void ggml_openvino_device_config::init() { } is_npu = (device_name == "NPU"); - auto * cache_dir = getenv("GGML_OPENVINO_CACHE_DIR"); + const char * cache_dir = ggml_openvino_getenv("GGML_OPENVINO_CACHE_DIR"); if (device_name == "NPU") { compile_config = { {"NPU_COMPILER_DYNAMIC_QUANTIZATION", "YES" }, @@ -119,6 +143,15 @@ const std::string & ggml_openvino_get_device_name() { return ggml_openvino_get_device_config().device_name; } +// Get the value of a specific environment variable +const char* ggml_openvino_getenv(const char* var){ + auto it = ggml_openvino_get_device_config().environment_variables.find(var); + if (it == ggml_openvino_get_device_config().environment_variables.end()) { + return nullptr; + } + return it->second.c_str(); +} + // Check if running on NPU bool ggml_openvino_is_npu() { return ggml_openvino_get_device_config().is_npu; diff --git a/ggml/src/ggml-openvino/ggml-openvino-extra.h b/ggml/src/ggml-openvino/ggml-openvino-extra.h index 57bfa4d907fd..789d2a61758c 100644 --- a/ggml/src/ggml-openvino/ggml-openvino-extra.h +++ b/ggml/src/ggml-openvino/ggml-openvino-extra.h @@ -64,6 +64,7 @@ struct ggml_openvino_device_config { bool initialized = false; std::optional remote_context; ov::AnyMap compile_config; + std::unordered_map environment_variables; cl_command_queue cl_queue = nullptr; void init(); @@ -79,6 +80,9 @@ void ggml_openvino_init_device_config(); // Get the device name const std::string & ggml_openvino_get_device_name(); +// Get the value of a specific environment variable +const char* ggml_openvino_getenv(const char* var); + // Check if running on NPU bool ggml_openvino_is_npu(); diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp index cd0c1738d833..1960e9621de7 100644 --- a/ggml/src/ggml-openvino/ggml-openvino.cpp +++ b/ggml/src/ggml-openvino/ggml-openvino.cpp @@ -1,10 +1,29 @@ #include "ggml-openvino.h" +#include "ggml-backend-impl.h" +#include "ggml-backend.h" +#include "ggml-impl.h" +#include "ggml-openvino-extra.h" #include "ggml-openvino/utils.h" #include "ggml-openvino/openvino/op_table.h" #include "ggml-quants.h" - +#include "ggml.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include #if defined(_WIN32) # define WIN32_LEAN_AND_MEAN @@ -129,7 +148,7 @@ static void * ggml_backend_openvino_buffer_get_base(ggml_backend_buffer_t buffer static bool is_stateful_enabled() { static const auto * stateful = getenv("GGML_OPENVINO_STATEFUL_EXECUTION"); - return stateful != nullptr && atoi(stateful) > 0; + return stateful && *stateful != '\0' && strcmp(stateful, "0") != 0; } static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) { diff --git a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp index 08d23d23f642..11e57e904dcb 100644 --- a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp +++ b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp @@ -1,6 +1,7 @@ #include "../node_context.h" #include "../op_table.h" #include "../utils.h" +#include "ggml-openvino/ggml-openvino-extra.h" #include #include @@ -68,11 +69,11 @@ OutputVector translate_flash_attn_ext(const NodeContext & context) { // Set GGML_OPENVINO_MANUAL_GQA_ATTN to a positive value (e.g. 1) to force-enable, // or to 0 to force-disable. Unset falls back to the device-based default. static const bool manual_gqa_enabled = []() { - const char * env = getenv("GGML_OPENVINO_MANUAL_GQA_ATTN"); + const char * env = ggml_openvino_getenv("GGML_OPENVINO_MANUAL_GQA_ATTN"); if (env != nullptr) { return atoi(env) > 0; } - const char * dev = getenv("GGML_OPENVINO_DEVICE"); + const char * dev = ggml_openvino_getenv("GGML_OPENVINO_DEVICE"); return dev != nullptr && std::string(dev) == "GPU"; }(); const bool use_manual_gqa_attention = diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp index b31b89052c4d..0556b89a8683 100644 --- a/ggml/src/ggml-openvino/utils.cpp +++ b/ggml/src/ggml-openvino/utils.cpp @@ -44,8 +44,8 @@ // =100); otherwise returns 0 (unset, empty, =0, negative, or non-numeric). // Boolean toggles use this as a flag: `if (ggml_openvino_env_flag(name))` is // true iff the value is positive, so =0 is a no-op for all toggles. -static int ggml_openvino_env_flag(const char * name) { - const char * v = getenv(name); +int ggml_openvino_env_flag(const char * name) { + const char * v = ggml_openvino_getenv(name); return v ? std::max(0, std::atoi(v)) : 0; } diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h index 2ed8f0c40223..f9c9633abd9b 100644 --- a/ggml/src/ggml-openvino/utils.h +++ b/ggml/src/ggml-openvino/utils.h @@ -1,4 +1,3 @@ -#include "ggml-backend-impl.h" #include "ggml-decoder.h" #include "ggml-impl.h" @@ -80,6 +79,8 @@ struct ov_runtime_context { } }; +int ggml_openvino_env_flag(const char * name); + enum ggml_status ov_graph_compute(struct ggml_cgraph * cgraph, ggml_backend_t backend); enum ggml_status ov_graph_compute_dynamic(struct ggml_cgraph * cgraph, std::shared_ptr r_ctx);