ravi9 · ravi9 · Jun 9, 2026 · Jun 8, 2026 · Jun 9, 2026 · Jun 9, 2026
@@ -5,6 +5,7 @@
 #include "ggml-openvino.h"
 #include "ggml-quants.h"
 #include "ggml.h"
+#include "utils.h"
 
 #include <algorithm>
 #include <cassert>
@@ -51,13 +52,12 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph,
     m_model_weights(model_weights),
     m_model_params(model_params),
     m_compute_params(compute_params) {
-    if (auto * env = getenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS"); env && atoi(env) > 0) {
-#ifdef _WIN32
-        _putenv_s("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS", "");
-#else
-        unsetenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS");
-#endif
-        print_tensor_address_map(cgraph);
+    static bool printed_address_map = false;
+    if (!printed_address_map) {
+        if (ggml_openvino_env_flag("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS")) {
+            printed_address_map = true;
+            print_tensor_address_map(cgraph);
+        }
     }
 
     validate_cgraph();

@@ -22,15 +22,39 @@ void ggml_openvino_device_config::init() {
     if (initialized) {
         return;
     }
-    device_name = getenv("GGML_OPENVINO_DEVICE") ? getenv("GGML_OPENVINO_DEVICE") : "CPU";
+
+    static constexpr const char* env_var_names[] = {
+        "GGML_OPENVINO_DEVICE",
+        "GGML_OPENVINO_CACHE_DIR",
+        "GGML_OPENVINO_PREFILL_CHUNK_SIZE",
+        "GGML_OPENVINO_STATEFUL_EXECUTION",
+        "GGML_OPENVINO_PROFILING",
+        "GGML_OPENVINO_DUMP_CGRAPH",
+        "GGML_OPENVINO_DUMP_IR",
+        "GGML_OPENVINO_DEBUG_INPUT",
+        "GGML_OPENVINO_DEBUG_OUTPUT",
+        "GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS",
+        "GGML_OPENVINO_ENABLE_CACHE",
+        "GGML_OPENVINO_DISABLE_KV_SLICE",
+        "GGML_OPENVINO_MANUAL_GQA_ATTN"
+    };
+
+    for (const char* const & env_var : env_var_names) {
+        auto * env = getenv(env_var);
+        if (env) {
+            environment_variables[env_var] = env;
+        }
+    }
+
+    device_name = ggml_openvino_getenv("GGML_OPENVINO_DEVICE") ? ggml_openvino_getenv("GGML_OPENVINO_DEVICE") : "CPU";
     auto available_devices = ov_singleton_core().get_available_devices();
     if (std::find(available_devices.begin(), available_devices.end(), device_name) == available_devices.end()) {
         GGML_LOG_WARN("GGML OpenVINO Backend: device %s is not available, fallback to CPU\n", device_name.c_str());
         device_name = "CPU";
     }
     is_npu = (device_name == "NPU");
 
-    auto * cache_dir = getenv("GGML_OPENVINO_CACHE_DIR");
+    const char * cache_dir = ggml_openvino_getenv("GGML_OPENVINO_CACHE_DIR");
     if (device_name == "NPU") {
         compile_config = {
             {"NPU_COMPILER_DYNAMIC_QUANTIZATION", "YES"   },
@@ -119,6 +143,15 @@ const std::string & ggml_openvino_get_device_name() {
     return ggml_openvino_get_device_config().device_name;
 }
 
+// Get the value of a specific environment variable
+const char* ggml_openvino_getenv(const char* var){
+    auto it =  ggml_openvino_get_device_config().environment_variables.find(var);
+    if (it == ggml_openvino_get_device_config().environment_variables.end()) {
+        return nullptr;
+    }
+    return it->second.c_str();
+}
+
 // Check if running on NPU
 bool ggml_openvino_is_npu() {
     return ggml_openvino_get_device_config().is_npu;

@@ -64,6 +64,7 @@ struct ggml_openvino_device_config {
     bool initialized = false;
     std::optional<ov::RemoteContext> remote_context;
     ov::AnyMap compile_config;
+    std::unordered_map<std::string, std::string> environment_variables;
     cl_command_queue cl_queue = nullptr;
 
     void init();
@@ -79,6 +80,9 @@ void ggml_openvino_init_device_config();
 // Get the device name
 const std::string & ggml_openvino_get_device_name();
 
+// Get the value of a specific environment variable
+const char* ggml_openvino_getenv(const char* var);
+
 // Check if running on NPU
 bool ggml_openvino_is_npu();
 

@@ -1,10 +1,29 @@
 #include "ggml-openvino.h"
 
+#include "ggml-backend-impl.h"
+#include "ggml-backend.h"
+#include "ggml-impl.h"
+#include "ggml-openvino-extra.h"
 #include "ggml-openvino/utils.h"
 #include "ggml-openvino/openvino/op_table.h"
 #include "ggml-quants.h"
-
+#include "ggml.h"
+
+#include <atomic>
+#include <cstdlib>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <mutex>
+#include <openvino/core/type/element_type.hpp>
+#include <openvino/openvino.hpp>
+#include <openvino/runtime/allocator.hpp>
 #include <openvino/runtime/intel_gpu/ocl/ocl.hpp>
+#include <openvino/runtime/intel_npu/level_zero/level_zero.hpp>
+#include <openvino/runtime/tensor.hpp>
+#include <set>
+#include <string>
+#include <vector>
 
 #if defined(_WIN32)
 #    define WIN32_LEAN_AND_MEAN
@@ -129,7 +148,7 @@ static void * ggml_backend_openvino_buffer_get_base(ggml_backend_buffer_t buffer
 
 static bool is_stateful_enabled() {
     static const auto * stateful = getenv("GGML_OPENVINO_STATEFUL_EXECUTION");
-    return stateful != nullptr && atoi(stateful) > 0;
+    return stateful && *stateful != '\0' && strcmp(stateful, "0") != 0;
 }
 
 static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {

@@ -1,6 +1,7 @@
 #include "../node_context.h"
 #include "../op_table.h"
 #include "../utils.h"
+#include "ggml-openvino/ggml-openvino-extra.h"
 
 #include <cstdint>
 #include <cstdlib>
@@ -68,11 +69,11 @@ OutputVector translate_flash_attn_ext(const NodeContext & context) {
     // Set GGML_OPENVINO_MANUAL_GQA_ATTN to a positive value (e.g. 1) to force-enable,
     // or to 0 to force-disable. Unset falls back to the device-based default.
     static const bool manual_gqa_enabled = []() {
-        const char * env = getenv("GGML_OPENVINO_MANUAL_GQA_ATTN");
+        const char * env = ggml_openvino_getenv("GGML_OPENVINO_MANUAL_GQA_ATTN");
         if (env != nullptr) {
             return atoi(env) > 0;
         }
-        const char * dev = getenv("GGML_OPENVINO_DEVICE");
+        const char * dev = ggml_openvino_getenv("GGML_OPENVINO_DEVICE");
         return dev != nullptr && std::string(dev) == "GPU";
     }();
     const bool use_manual_gqa_attention =

@@ -44,8 +44,8 @@
 // =100); otherwise returns 0 (unset, empty, =0, negative, or non-numeric).
 // Boolean toggles use this as a flag: `if (ggml_openvino_env_flag(name))` is
 // true iff the value is positive, so =0 is a no-op for all toggles.
-static int ggml_openvino_env_flag(const char * name) {
-    const char * v = getenv(name);
+int ggml_openvino_env_flag(const char * name) {
+    const char * v = ggml_openvino_getenv(name);
     return v ? std::max(0, std::atoi(v)) : 0;
 }
 

@@ -1,4 +1,3 @@
-#include "ggml-backend-impl.h"
 #include "ggml-decoder.h"
 #include "ggml-impl.h"
 
@@ -80,6 +79,8 @@ struct ov_runtime_context {
     }
 };
 
+int ggml_openvino_env_flag(const char * name);
+
 enum ggml_status ov_graph_compute(struct ggml_cgraph * cgraph, ggml_backend_t backend);
 
 enum ggml_status ov_graph_compute_dynamic(struct ggml_cgraph * cgraph, std::shared_ptr<ov_runtime_context> r_ctx);