From 34b2bee0137b463a50e6442110b92b0b3bda2fff Mon Sep 17 00:00:00 2001
From: Mostafa Faheem <mostafaaafaheem@gmail.com>
Date: Mon, 8 Jun 2026 23:21:59 +0300
Subject: [PATCH 1/3] OpenVINO backend: Enhance envvar handling

---
 ggml/src/ggml-openvino/ggml-decoder.cpp       |  14 +-
 .../src/ggml-openvino/ggml-openvino-extra.cpp |  37 +++++-
 ggml/src/ggml-openvino/ggml-openvino-extra.h  |   4 +
 ggml/src/ggml-openvino/ggml-openvino.cpp      | 120 ++++++++++--------
 .../openvino/op/flash_attn_ext.cpp            |   5 +-
 ggml/src/ggml-openvino/utils.cpp              |   4 +-
 ggml/src/ggml-openvino/utils.h                |   3 +-
 7 files changed, 123 insertions(+), 64 deletions(-)
diff --git a/ggml/src/ggml-openvino/ggml-decoder.cpp b/ggml/src/ggml-openvino/ggml-decoder.cpp
index 3f6cfedfe897..0353475ae32e 100644
--- a/ggml/src/ggml-openvino/ggml-decoder.cpp
+++ b/ggml/src/ggml-openvino/ggml-decoder.cpp
@@ -5,6 +5,7 @@
 #include "ggml-openvino.h"
 #include "ggml-quants.h"
 #include "ggml.h"
+#include "utils.h"
 
 #include <algorithm>
 #include <cassert>
@@ -51,13 +52,12 @@ GgmlOvDecoder::GgmlOvDecoder(ggml_cgraph * cgraph,
     m_model_weights(model_weights),
     m_model_params(model_params),
     m_compute_params(compute_params) {
-    if (auto * env = getenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS"); env && atoi(env) > 0) {
-#ifdef _WIN32
-        _putenv_s("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS", "");
-#else
-        unsetenv("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS");
-#endif
-        print_tensor_address_map(cgraph);
+    static bool printed_address_map = false;
+    if (!printed_address_map) {
+        if (ggml_openvino_env_flag("GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS")) {
+            printed_address_map = true;
+            print_tensor_address_map(cgraph);
+        }
     }
 
     validate_cgraph();
diff --git a/ggml/src/ggml-openvino/ggml-openvino-extra.cpp b/ggml/src/ggml-openvino/ggml-openvino-extra.cpp
index 4140136aca25..d05085606e7a 100644
--- a/ggml/src/ggml-openvino/ggml-openvino-extra.cpp
+++ b/ggml/src/ggml-openvino/ggml-openvino-extra.cpp
@@ -22,7 +22,31 @@ void ggml_openvino_device_config::init() {
     if (initialized) {
         return;
     }
-    device_name = getenv("GGML_OPENVINO_DEVICE") ? getenv("GGML_OPENVINO_DEVICE") : "CPU";
+
+    static constexpr const char* env_var_names[] = {
+        "GGML_OPENVINO_DEVICE",
+        "GGML_OPENVINO_CACHE_DIR",
+        "GGML_OPENVINO_PREFILL_CHUNK_SIZE",
+        "GGML_OPENVINO_STATEFUL_EXECUTION",
+        "GGML_OPENVINO_PROFILING",
+        "GGML_OPENVINO_DUMP_CGRAPH",
+        "GGML_OPENVINO_DUMP_IR",
+        "GGML_OPENVINO_DEBUG_INPUT",
+        "GGML_OPENVINO_DEBUG_OUTPUT",
+        "GGML_OPENVINO_PRINT_CGRAPH_TENSOR_ADDRESS",
+        "GGML_OPENVINO_ENABLE_CACHE",
+        "GGML_OPENVINO_DISABLE_KV_SLICE",
+        "GGML_OPENVINO_MANUAL_GQA_ATTN"
+    };
+
+    for (const char* const & env_var : env_var_names) {
+        auto * env = getenv(env_var);
+        if (env) {
+            environment_variables[env_var] = env;
+        }
+    }
+
+    device_name = ggml_openvino_getenv("GGML_OPENVINO_DEVICE") ? ggml_openvino_getenv("GGML_OPENVINO_DEVICE") : "CPU";
     auto available_devices = ov_singleton_core().get_available_devices();
     if (std::find(available_devices.begin(), available_devices.end(), device_name) == available_devices.end()) {
         GGML_LOG_WARN("GGML OpenVINO Backend: device %s is not available, fallback to CPU\n", device_name.c_str());
@@ -30,7 +54,7 @@ void ggml_openvino_device_config::init() {
     }
     is_npu = (device_name == "NPU");
 
-    auto * cache_dir = getenv("GGML_OPENVINO_CACHE_DIR");
+    const char * cache_dir = ggml_openvino_getenv("GGML_OPENVINO_CACHE_DIR");
     if (device_name == "NPU") {
         compile_config = {
             {"NPU_COMPILER_DYNAMIC_QUANTIZATION", "YES"   },
@@ -119,6 +143,15 @@ const std::string & ggml_openvino_get_device_name() {
     return ggml_openvino_get_device_config().device_name;
 }
 
+// Get the value of a specific environment variable
+const char* ggml_openvino_getenv(const char* var){
+    auto it =  ggml_openvino_get_device_config().environment_variables.find(var);
+    if (it == ggml_openvino_get_device_config().environment_variables.end()) {
+        return nullptr;
+    }
+    return it->second.c_str();
+}
+
 // Check if running on NPU
 bool ggml_openvino_is_npu() {
     return ggml_openvino_get_device_config().is_npu;
diff --git a/ggml/src/ggml-openvino/ggml-openvino-extra.h b/ggml/src/ggml-openvino/ggml-openvino-extra.h
index 57bfa4d907fd..789d2a61758c 100644
--- a/ggml/src/ggml-openvino/ggml-openvino-extra.h
+++ b/ggml/src/ggml-openvino/ggml-openvino-extra.h
@@ -64,6 +64,7 @@ struct ggml_openvino_device_config {
     bool initialized = false;
     std::optional<ov::RemoteContext> remote_context;
     ov::AnyMap compile_config;
+    std::unordered_map<std::string, std::string> environment_variables;
     cl_command_queue cl_queue = nullptr;
 
     void init();
@@ -79,6 +80,9 @@ void ggml_openvino_init_device_config();
 // Get the device name
 const std::string & ggml_openvino_get_device_name();
 
+// Get the value of a specific environment variable
+const char* ggml_openvino_getenv(const char* var);
+
 // Check if running on NPU
 bool ggml_openvino_is_npu();
 
diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp
index cd0c1738d833..6eb0c9255e72 100644
--- a/ggml/src/ggml-openvino/ggml-openvino.cpp
+++ b/ggml/src/ggml-openvino/ggml-openvino.cpp
@@ -1,10 +1,28 @@
 #include "ggml-openvino.h"
 
+#include "ggml-backend-impl.h"
+#include "ggml-backend.h"
+#include "ggml-impl.h"
+#include "ggml-openvino-extra.h"
 #include "ggml-openvino/utils.h"
-#include "ggml-openvino/openvino/op_table.h"
 #include "ggml-quants.h"
-
+#include "ggml.h"
+
+#include <atomic>
+#include <cstdlib>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <mutex>
+#include <openvino/core/type/element_type.hpp>
+#include <openvino/openvino.hpp>
+#include <openvino/runtime/allocator.hpp>
 #include <openvino/runtime/intel_gpu/ocl/ocl.hpp>
+#include <openvino/runtime/intel_npu/level_zero/level_zero.hpp>
+#include <openvino/runtime/tensor.hpp>
+#include <set>
+#include <string>
+#include <vector>
 
 #if defined(_WIN32)
 #    define WIN32_LEAN_AND_MEAN
@@ -129,7 +147,7 @@ static void * ggml_backend_openvino_buffer_get_base(ggml_backend_buffer_t buffer
 
 static bool is_stateful_enabled() {
     static const auto * stateful = getenv("GGML_OPENVINO_STATEFUL_EXECUTION");
-    return stateful != nullptr && atoi(stateful) > 0;
+    return stateful && *stateful != '\0' && strcmp(stateful, "0") != 0;
 }
 
 static enum ggml_status ggml_backend_openvino_buffer_init_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor) {
@@ -892,8 +910,7 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
         break;
     }
     case GGML_OP_ADD:
-    case GGML_OP_MUL:
-    case GGML_OP_SUB: {
+    case GGML_OP_MUL: {
         if (op->src[1]->op == GGML_OP_PERMUTE) {
             return true;
         }
@@ -1030,9 +1047,19 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
             op->src[0]->src[0]->src[0]->op == GGML_OP_PERMUTE) {
             return true;
         }
+        if (op->src[0]->type == GGML_TYPE_F16 && op->src[1]->type == GGML_TYPE_F16) {
+            // Has accuracy issue, try enabling this and see `test-backend-ops -o "MUL_MAT"`
+            // GGML_LOG_WARN("OpenVINO backend does not support MUL_MAT with two F16 tensors\n");
+            return true;
+        }
         if (op->src[0]->ne[3] != op->src[1]->ne[3] && op->src[0]->ne[3] != 1 && op->src[1]->ne[3] != 1) {
             return true;
         }
+        if (ggml_is_quantized(op->src[0]->type) && op->src[0]->ne[1] == 1) {
+            // MUL_MAT(type_a=q4_0,type_b=f32,m=1,n=2048,k=8192,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1)
+            // triggers a bug in ov matmul_shape_inference.hpp
+            return true;
+        }
         if (op->src[0]->op == GGML_OP_VIEW && op->src[1]->op == GGML_OP_VIEW) {
             return true;
         }
@@ -1121,14 +1148,6 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
         // Keep this op on CPU until the OpenVINO implementation is fixed.
         return true;
     }
-    case GGML_OP_VIEW: {
-        // Skip TOPK_MOE fused tests until it is fully supported
-        // the argsort_top_k VIEW wrapping ARGSORT is named "selected_experts" in test_topk_moe
-        if (strcmp(op->name, "selected_experts") == 0) {
-            return true;
-        }
-        break;
-    }
     default:
         break;
     }
@@ -1138,47 +1157,48 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
 static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
     GGML_ASSERT(dev->reg != nullptr);
 
-    static std::unordered_set<ggml_type> supported_types{GGML_TYPE_F32,  GGML_TYPE_F16,  GGML_TYPE_BF16, GGML_TYPE_I64,
+    static std::set<ggml_type> supported_types{GGML_TYPE_F32,  GGML_TYPE_F16,  GGML_TYPE_BF16, GGML_TYPE_I64,
                                                GGML_TYPE_I32,  GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_K,
                                                GGML_TYPE_Q5_K, GGML_TYPE_Q8_0, GGML_TYPE_Q6_K};
 
-    // derive supported op sets from the op_table map, keys in
-    // the map use the full macro name (e.g. "GGML_OP_ADD"), while
-    // the ggml_*_op_name() helpers return only the trailing part (e.g. "ADD").
-    // each set is built once and cached.
-    static const auto build_supported_sets = [] {
-        const auto & table = ov::frontend::ggml::get_supported_ops();
-        std::unordered_set<ggml_op> ops;
-        std::unordered_set<ggml_unary_op> unary_ops;
-        std::unordered_set<ggml_glu_op> glu_ops;
-
-        // GGML_OP_NONE has no translator but is always safe to add to the supported set.
-        ops.insert(GGML_OP_NONE);
-
-        for (int i = 0; i < GGML_OP_COUNT; ++i) {
-            const std::string key = std::string("GGML_OP_") + ggml_op_name(static_cast<ggml_op>(i));
-            if (table.count(key)) {
-                ops.insert(static_cast<ggml_op>(i));
-            }
-        }
-        for (int i = 0; i < GGML_UNARY_OP_COUNT; ++i) {
-            const std::string key = std::string("GGML_UNARY_OP_") + ggml_unary_op_name(static_cast<ggml_unary_op>(i));
-            if (table.count(key)) {
-                unary_ops.insert(static_cast<ggml_unary_op>(i));
-            }
-        }
-        for (int i = 0; i < GGML_GLU_OP_COUNT; ++i) {
-            const std::string key = std::string("GGML_GLU_OP_") + ggml_glu_op_name(static_cast<ggml_glu_op>(i));
-            if (table.count(key)) {
-                glu_ops.insert(static_cast<ggml_glu_op>(i));
-            }
-        }
-        return std::make_tuple(ops, unary_ops, glu_ops);
+    static const std::set<ggml_op> supported_ops{GGML_OP_NONE,
+                                                 GGML_OP_ADD,
+                                                 GGML_OP_CONCAT,
+                                                 GGML_OP_DIV,
+                                                 GGML_OP_MUL,
+                                                 GGML_OP_MUL_MAT,
+                                                 GGML_OP_MUL_MAT_ID,
+                                                 GGML_OP_VIEW,
+                                                 GGML_OP_CONT,
+                                                 GGML_OP_RESHAPE,
+                                                 GGML_OP_PERMUTE,
+                                                 GGML_OP_TRANSPOSE,
+                                                 GGML_OP_GET_ROWS,
+                                                 GGML_OP_ROPE,
+                                                 GGML_OP_RMS_NORM,
+                                                 GGML_OP_SCALE,
+                                                 GGML_OP_NORM,
+                                                 GGML_OP_SOFT_MAX,
+                                                 GGML_OP_SET_ROWS,
+                                                 GGML_OP_FLASH_ATTN_EXT,
+                                                 GGML_OP_CPY,
+                                                 GGML_OP_L2_NORM,
+                                                 GGML_OP_SUM_ROWS,
+                                                 GGML_OP_CLAMP,
+                                                 GGML_OP_PAD,
+                                                 GGML_OP_SSM_CONV,
+                                                 GGML_OP_GATED_DELTA_NET,
+                                                 GGML_OP_IM2COL};
+    static const std::set<ggml_unary_op> supported_unary_ops{
+        GGML_UNARY_OP_GELU,
+        GGML_UNARY_OP_SILU,
+        GGML_UNARY_OP_SOFTPLUS,
+        GGML_UNARY_OP_TANH,
+    };
+    static const std::set<ggml_glu_op> supported_glu_ops{
+        GGML_GLU_OP_SWIGLU,
+        GGML_GLU_OP_GEGLU,
     };
-    static const auto supported_sets = build_supported_sets();
-    static const auto & supported_ops = std::get<0>(supported_sets);
-    static const auto & supported_unary_ops = std::get<1>(supported_sets);
-    static const auto & supported_glu_ops = std::get<2>(supported_sets);
 
     switch (op->op) {
     case GGML_OP_UNARY: {
diff --git a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp
index 08d23d23f642..11e57e904dcb 100644
--- a/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp
+++ b/ggml/src/ggml-openvino/openvino/op/flash_attn_ext.cpp
@@ -1,6 +1,7 @@
 #include "../node_context.h"
 #include "../op_table.h"
 #include "../utils.h"
+#include "ggml-openvino/ggml-openvino-extra.h"
 
 #include <cstdint>
 #include <cstdlib>
@@ -68,11 +69,11 @@ OutputVector translate_flash_attn_ext(const NodeContext & context) {
     // Set GGML_OPENVINO_MANUAL_GQA_ATTN to a positive value (e.g. 1) to force-enable,
     // or to 0 to force-disable. Unset falls back to the device-based default.
     static const bool manual_gqa_enabled = []() {
-        const char * env = getenv("GGML_OPENVINO_MANUAL_GQA_ATTN");
+        const char * env = ggml_openvino_getenv("GGML_OPENVINO_MANUAL_GQA_ATTN");
         if (env != nullptr) {
             return atoi(env) > 0;
         }
-        const char * dev = getenv("GGML_OPENVINO_DEVICE");
+        const char * dev = ggml_openvino_getenv("GGML_OPENVINO_DEVICE");
         return dev != nullptr && std::string(dev) == "GPU";
     }();
     const bool use_manual_gqa_attention =
diff --git a/ggml/src/ggml-openvino/utils.cpp b/ggml/src/ggml-openvino/utils.cpp
index b31b89052c4d..0556b89a8683 100644
--- a/ggml/src/ggml-openvino/utils.cpp
+++ b/ggml/src/ggml-openvino/utils.cpp
@@ -44,8 +44,8 @@
 // =100); otherwise returns 0 (unset, empty, =0, negative, or non-numeric).
 // Boolean toggles use this as a flag: `if (ggml_openvino_env_flag(name))` is
 // true iff the value is positive, so =0 is a no-op for all toggles.
-static int ggml_openvino_env_flag(const char * name) {
-    const char * v = getenv(name);
+int ggml_openvino_env_flag(const char * name) {
+    const char * v = ggml_openvino_getenv(name);
     return v ? std::max(0, std::atoi(v)) : 0;
 }
 
diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h
index 2ed8f0c40223..10253d991cf8 100644
--- a/ggml/src/ggml-openvino/utils.h
+++ b/ggml/src/ggml-openvino/utils.h
@@ -1,4 +1,3 @@
-#include "ggml-backend-impl.h"
 #include "ggml-decoder.h"
 #include "ggml-impl.h"
 
@@ -93,6 +92,8 @@ void print_input_tensor_info(const std::string & name, const ov::Tensor & tensor
 
 void print_output_tensor_info(const std::string & name, const ov::Tensor & tensor, const void * output_dst);
 
+int ggml_openvino_env_flag(const char * name);
+
 template <typename T>
 std::vector<T> pad_input(const T * data,
                          size_t rows,

From b3f21ea8b0fccde57591aff261dacb653eed6db7 Mon Sep 17 00:00:00 2001
From: Mostafa Faheem <mostafaaafaheem@gmail.com>
Date: Tue, 9 Jun 2026 12:40:22 +0300
Subject: [PATCH 2/3] more cleanup

---
 ggml/src/ggml-openvino/ggml-openvino.cpp | 97 ++++++++++++------------
 1 file changed, 48 insertions(+), 49 deletions(-)

diff --git a/ggml/src/ggml-openvino/ggml-openvino.cpp b/ggml/src/ggml-openvino/ggml-openvino.cpp
index 6eb0c9255e72..1960e9621de7 100644
--- a/ggml/src/ggml-openvino/ggml-openvino.cpp
+++ b/ggml/src/ggml-openvino/ggml-openvino.cpp
@@ -5,6 +5,7 @@
 #include "ggml-impl.h"
 #include "ggml-openvino-extra.h"
 #include "ggml-openvino/utils.h"
+#include "ggml-openvino/openvino/op_table.h"
 #include "ggml-quants.h"
 #include "ggml.h"
 
@@ -910,7 +911,8 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
         break;
     }
     case GGML_OP_ADD:
-    case GGML_OP_MUL: {
+    case GGML_OP_MUL:
+    case GGML_OP_SUB: {
         if (op->src[1]->op == GGML_OP_PERMUTE) {
             return true;
         }
@@ -1047,19 +1049,9 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
             op->src[0]->src[0]->src[0]->op == GGML_OP_PERMUTE) {
             return true;
         }
-        if (op->src[0]->type == GGML_TYPE_F16 && op->src[1]->type == GGML_TYPE_F16) {
-            // Has accuracy issue, try enabling this and see `test-backend-ops -o "MUL_MAT"`
-            // GGML_LOG_WARN("OpenVINO backend does not support MUL_MAT with two F16 tensors\n");
-            return true;
-        }
         if (op->src[0]->ne[3] != op->src[1]->ne[3] && op->src[0]->ne[3] != 1 && op->src[1]->ne[3] != 1) {
             return true;
         }
-        if (ggml_is_quantized(op->src[0]->type) && op->src[0]->ne[1] == 1) {
-            // MUL_MAT(type_a=q4_0,type_b=f32,m=1,n=2048,k=8192,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1)
-            // triggers a bug in ov matmul_shape_inference.hpp
-            return true;
-        }
         if (op->src[0]->op == GGML_OP_VIEW && op->src[1]->op == GGML_OP_VIEW) {
             return true;
         }
@@ -1148,6 +1140,14 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
         // Keep this op on CPU until the OpenVINO implementation is fixed.
         return true;
     }
+    case GGML_OP_VIEW: {
+        // Skip TOPK_MOE fused tests until it is fully supported
+        // the argsort_top_k VIEW wrapping ARGSORT is named "selected_experts" in test_topk_moe
+        if (strcmp(op->name, "selected_experts") == 0) {
+            return true;
+        }
+        break;
+    }
     default:
         break;
     }
@@ -1157,48 +1157,47 @@ static bool is_op_unsupported_case(const ggml_tensor * op) {
 static bool ggml_backend_openvino_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
     GGML_ASSERT(dev->reg != nullptr);
 
-    static std::set<ggml_type> supported_types{GGML_TYPE_F32,  GGML_TYPE_F16,  GGML_TYPE_BF16, GGML_TYPE_I64,
+    static std::unordered_set<ggml_type> supported_types{GGML_TYPE_F32,  GGML_TYPE_F16,  GGML_TYPE_BF16, GGML_TYPE_I64,
                                                GGML_TYPE_I32,  GGML_TYPE_Q4_0, GGML_TYPE_Q4_1, GGML_TYPE_Q4_K,
                                                GGML_TYPE_Q5_K, GGML_TYPE_Q8_0, GGML_TYPE_Q6_K};
 
-    static const std::set<ggml_op> supported_ops{GGML_OP_NONE,
-                                                 GGML_OP_ADD,
-                                                 GGML_OP_CONCAT,
-                                                 GGML_OP_DIV,
-                                                 GGML_OP_MUL,
-                                                 GGML_OP_MUL_MAT,
-                                                 GGML_OP_MUL_MAT_ID,
-                                                 GGML_OP_VIEW,
-                                                 GGML_OP_CONT,
-                                                 GGML_OP_RESHAPE,
-                                                 GGML_OP_PERMUTE,
-                                                 GGML_OP_TRANSPOSE,
-                                                 GGML_OP_GET_ROWS,
-                                                 GGML_OP_ROPE,
-                                                 GGML_OP_RMS_NORM,
-                                                 GGML_OP_SCALE,
-                                                 GGML_OP_NORM,
-                                                 GGML_OP_SOFT_MAX,
-                                                 GGML_OP_SET_ROWS,
-                                                 GGML_OP_FLASH_ATTN_EXT,
-                                                 GGML_OP_CPY,
-                                                 GGML_OP_L2_NORM,
-                                                 GGML_OP_SUM_ROWS,
-                                                 GGML_OP_CLAMP,
-                                                 GGML_OP_PAD,
-                                                 GGML_OP_SSM_CONV,
-                                                 GGML_OP_GATED_DELTA_NET,
-                                                 GGML_OP_IM2COL};
-    static const std::set<ggml_unary_op> supported_unary_ops{
-        GGML_UNARY_OP_GELU,
-        GGML_UNARY_OP_SILU,
-        GGML_UNARY_OP_SOFTPLUS,
-        GGML_UNARY_OP_TANH,
-    };
-    static const std::set<ggml_glu_op> supported_glu_ops{
-        GGML_GLU_OP_SWIGLU,
-        GGML_GLU_OP_GEGLU,
+    // derive supported op sets from the op_table map, keys in
+    // the map use the full macro name (e.g. "GGML_OP_ADD"), while
+    // the ggml_*_op_name() helpers return only the trailing part (e.g. "ADD").
+    // each set is built once and cached.
+    static const auto build_supported_sets = [] {
+        const auto & table = ov::frontend::ggml::get_supported_ops();
+        std::unordered_set<ggml_op> ops;
+        std::unordered_set<ggml_unary_op> unary_ops;
+        std::unordered_set<ggml_glu_op> glu_ops;
+
+        // GGML_OP_NONE has no translator but is always safe to add to the supported set.
+        ops.insert(GGML_OP_NONE);
+
+        for (int i = 0; i < GGML_OP_COUNT; ++i) {
+            const std::string key = std::string("GGML_OP_") + ggml_op_name(static_cast<ggml_op>(i));
+            if (table.count(key)) {
+                ops.insert(static_cast<ggml_op>(i));
+            }
+        }
+        for (int i = 0; i < GGML_UNARY_OP_COUNT; ++i) {
+            const std::string key = std::string("GGML_UNARY_OP_") + ggml_unary_op_name(static_cast<ggml_unary_op>(i));
+            if (table.count(key)) {
+                unary_ops.insert(static_cast<ggml_unary_op>(i));
+            }
+        }
+        for (int i = 0; i < GGML_GLU_OP_COUNT; ++i) {
+            const std::string key = std::string("GGML_GLU_OP_") + ggml_glu_op_name(static_cast<ggml_glu_op>(i));
+            if (table.count(key)) {
+                glu_ops.insert(static_cast<ggml_glu_op>(i));
+            }
+        }
+        return std::make_tuple(ops, unary_ops, glu_ops);
     };
+    static const auto supported_sets = build_supported_sets();
+    static const auto & supported_ops = std::get<0>(supported_sets);
+    static const auto & supported_unary_ops = std::get<1>(supported_sets);
+    static const auto & supported_glu_ops = std::get<2>(supported_sets);
 
     switch (op->op) {
     case GGML_OP_UNARY: {

From e68a1030383f5cc0f787b076472da8de2b2a22bb Mon Sep 17 00:00:00 2001
From: Mostafa Faheem <mostafaaafaheem@gmail.com>
Date: Tue, 9 Jun 2026 14:14:36 +0300
Subject: [PATCH 3/3] move ggml_openvino_env_flag to appropriate place

---
 ggml/src/ggml-openvino/utils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ggml/src/ggml-openvino/utils.h b/ggml/src/ggml-openvino/utils.h
index 10253d991cf8..f9c9633abd9b 100644
--- a/ggml/src/ggml-openvino/utils.h
+++ b/ggml/src/ggml-openvino/utils.h
@@ -79,6 +79,8 @@ struct ov_runtime_context {
     }
 };
 
+int ggml_openvino_env_flag(const char * name);
+
 enum ggml_status ov_graph_compute(struct ggml_cgraph * cgraph, ggml_backend_t backend);
 
 enum ggml_status ov_graph_compute_dynamic(struct ggml_cgraph * cgraph, std::shared_ptr<ov_runtime_context> r_ctx);
@@ -92,8 +94,6 @@ void print_input_tensor_info(const std::string & name, const ov::Tensor & tensor
 
 void print_output_tensor_info(const std::string & name, const ov::Tensor & tensor, const void * output_dst);
 
-int ggml_openvino_env_flag(const char * name);
-
 template <typename T>
 std::vector<T> pad_input(const T * data,
                          size_t rows,