From 3a5446df2c038a6d9d4f0df255c92ecbb0de6403 Mon Sep 17 00:00:00 2001 From: xieofxie Date: Tue, 23 Jun 2026 16:33:29 +0800 Subject: [PATCH 1/3] fix(build): forward --ep to config generation; gate compile on EP availability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `winml build` dropped --ep when auto-generating a config — it passed only device/precision to generate_build_config, so the requested EP never reached config generation. With an explicit --device that isn't present, config gen then resolved the device with ep=None and failed immediately with a device-availability error, silently ignoring --ep (e.g. `winml build -m microsoft/resnet-50 --ep openvino --device npu --compile`). - Forward `ep` into generate_build_config so the EP shapes the generated config. - Add an early, compile-gated EP-availability check after config validation: a compile build physically instantiates the EP, so an unavailable EP/device now fails fast (before export) instead of deep in the compile stage. A no-compile build only produces a portable, analyzed ONNX and may target a device absent on the build machine (cross-compile), so it is allowed to proceed. - Stop the optimize-stage device resolution from availability-failing: it only needs a concrete device name for the analyzer's rule-data lookup, so it no longer passes ep or rejects an absent explicit device. Verified end-to-end: `--compile --device npu --ep openvino` (no NPU) now fails before export with the EP respected; `--no-compile` proceeds to export the portable model. Related to the EP-threading family (#931). --- src/winml/modelkit/commands/build.py | 30 ++++++++++++- tests/unit/commands/test_build.py | 66 ++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index c3ffc660d..1eebcb116 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -579,6 +579,7 @@ def build( trust_remote_code=trust_remote_code, device=device, precision=precision, + ep=ep, ) if not quant: config_or_configs.quant = None @@ -641,6 +642,24 @@ def _patch_device(cfg: WinMLBuildConfig) -> None: except ValueError as e: raise click.UsageError(f"Config validation failed: {e}") from e + # Fail fast for compile builds: compilation physically instantiates the + # EP (EPContext), so the target EP/device must be present on this + # machine. Catch an unavailable combo here -- before export/optimize -- + # instead of surfacing deep in the compile stage. A no-compile build + # only produces a portable, analyzed ONNX and may legitimately target a + # device absent on this machine (cross-compile), so it is left to run. + for _cfg in _configs_to_validate: + _compile = _cfg.compile + if _compile is None or _compile.ep_config is None: + continue + from ..sysinfo import resolve_device as _resolve_device_check + + try: + _resolve_device_check(device=device or "auto", ep=_compile.ep_config.provider) + except ValueError as e: + raise click.UsageError(str(e)) from e + break + preloaded_hf_config = _validate_loader_tasks_for_model( model_id=model_id, configs=_configs_to_validate, @@ -1012,11 +1031,18 @@ def _on_iteration_start(iteration: int, max_iter: int) -> None: _header_shown[0] = False # Resolve "auto" to a concrete device once so that has_rule_data_for_ep - # doesn't search for non-existent "*_AUTO_*.parquet" files. + # doesn't search for non-existent "*_AUTO_*.parquet" files. Only a device + # name is needed (the EP comes from each analyzer callback below), so + # don't pass ep or availability-fail here: a no-compile cross-compile + # build may target a device absent on this machine (compile builds are + # gated earlier, before export). from ..analyze.utils.ep_utils import has_rule_data_for_ep from ..sysinfo import resolve_device as _resolve_device - _resolved_device, _ = _resolve_device(device=device or "auto", ep=ep) + if device and device.lower() != "auto": + _resolved_device = device.lower() + else: + _resolved_device, _ = _resolve_device(device="auto") def _on_ep_start(ep_name: EPName, operator_counts: dict) -> None: nonlocal _current_ep diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py index 00f54fc23..904f55314 100644 --- a/tests/unit/commands/test_build.py +++ b/tests/unit/commands/test_build.py @@ -1711,3 +1711,69 @@ def test_returns_compiled_path_when_file_exists( # current_path should be updated to compiled_path assert result == compiled_path + + +class TestBuildEpResolution: + """--ep forwarding into config generation + the compile EP-availability gate.""" + + def _base_args(self, cfg: str, tmp_path: Path) -> list[str]: + return ["-c", cfg, "-m", "microsoft/resnet-50", "-o", str(tmp_path / "out")] + + def test_ep_forwarded_to_generate_build_config( + self, tmp_path: Path, mock_run_single_build: MagicMock + ): + """On the auto-config path (-m, no -c), --ep reaches generate_build_config. + + Regression: the build command dropped --ep when auto-generating a config, + so the requested EP never influenced the generated config (it failed or + analyzed/compiled for the wrong EP). + """ + fake_cfg = MagicMock() + fake_cfg.compile = None # no compile -> EP-availability gate is skipped + with ( + patch("winml.modelkit.config.generate_build_config", return_value=fake_cfg) as mock_gen, + patch( + "winml.modelkit.commands.build._validate_loader_tasks_for_model", + return_value=None, + ), + ): + result = _invoke( + ["-m", "microsoft/resnet-50", "--ep", "openvino", "-o", str(tmp_path / "out")] + ) + assert result.exit_code == 0, result.output + assert mock_gen.call_args.kwargs["ep"] == "openvino" + + def test_compile_build_unavailable_ep_fails_before_build( + self, tmp_path: Path, mock_run_single_build: MagicMock + ): + """A compile build whose EP isn't present fails fast, before _run_single_build. + + Compilation physically instantiates the EP, so an unavailable EP must be + caught up front rather than deep in the compile stage. + """ + cfg = _make_minimal_config_file(tmp_path, compile_section={"execution_provider": "qnn"}) + with patch( + "winml.modelkit.sysinfo.resolve_device", + side_effect=ValueError("Requested EP 'qnn' is not available on this system."), + ): + result = _invoke([*self._base_args(cfg, tmp_path), "--ep", "qnn", "--compile"]) + assert result.exit_code == 2, result.output + assert "not available on this system" in result.output + mock_run_single_build.assert_not_called() + + def test_no_compile_build_skips_ep_availability_gate( + self, tmp_path: Path, mock_run_single_build: MagicMock + ): + """A no-compile build proceeds even when the EP isn't present (cross-compile). + + No-compile only produces a portable, analyzed ONNX, so it must not require + the target EP/device to exist on the build machine. + """ + cfg = _make_minimal_config_file(tmp_path, compile_section={"execution_provider": "qnn"}) + with patch( + "winml.modelkit.sysinfo.resolve_device", + side_effect=ValueError("Requested EP 'qnn' is not available on this system."), + ): + result = _invoke([*self._base_args(cfg, tmp_path), "--ep", "qnn", "--no-compile"]) + assert result.exit_code == 0, result.output + mock_run_single_build.assert_called_once() From b762c59f4e0b78de46d4276336d3ab89f2d3975d Mon Sep 17 00:00:00 2001 From: xieofxie Date: Tue, 23 Jun 2026 17:17:22 +0800 Subject: [PATCH 2/3] force device + ep set --- src/winml/modelkit/sysinfo/device.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/winml/modelkit/sysinfo/device.py b/src/winml/modelkit/sysinfo/device.py index 6f6fdb5b9..84b45164c 100644 --- a/src/winml/modelkit/sysinfo/device.py +++ b/src/winml/modelkit/sysinfo/device.py @@ -147,7 +147,7 @@ def _get_available_eps() -> frozenset[EPName]: def resolve_device( device: str = "auto", *, - ep: EPNameOrAlias | None = None, + ep: EPNameOrAlias | None, ) -> tuple[str, list[str]]: """Resolve target device with EP availability cross-check. @@ -233,7 +233,7 @@ def resolve_eps(resolved_device: str) -> list[EPName]: def resolve_check_device_ep( - *, device: str = "auto", ep: EPNameOrAlias | None = None + *, device: str, ep: EPNameOrAlias | None ) -> tuple[str, list[str], list[EPName]]: """Resolve or check that the requested device and/or EP combination is valid, raising if not. From baa659ff554f56a0f06eaacf845c199feb23735a Mon Sep 17 00:00:00 2001 From: xieofxie Date: Tue, 23 Jun 2026 17:35:37 +0800 Subject: [PATCH 3/3] fix: require explicit device + ep in resolve_device; fix all callers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolve_device(device, *, ep) now takes both device and ep as required args, so an omission is a call error instead of a silent ep=None / device="auto". Several callers still relied on the old defaults (latent TypeErrors) — pass ep=None explicitly where no EP filter is intended: - analyzer: auto-device resolution for rule-file selection - build: the ep-is-None auto-EP resolution - eval: device resolution - compile stage: device resolution (EP comes from ep_config) - test_device.py: resolve_device call sites Also keeps the build command forwarding --ep to generate_build_config, and drops the compile-availability gate / optimize-stage tweak from earlier on this branch — requiring explicit device+ep is the cleaner, centralized enforcement. --- src/winml/modelkit/analyze/analyzer.py | 7 ++-- src/winml/modelkit/commands/build.py | 31 ++-------------- src/winml/modelkit/commands/eval.py | 2 +- src/winml/modelkit/compiler/stages/compile.py | 2 +- src/winml/modelkit/sysinfo/device.py | 2 +- tests/unit/commands/test_build.py | 35 ------------------- tests/unit/sysinfo/test_device.py | 18 +++++----- 7 files changed, 17 insertions(+), 80 deletions(-) diff --git a/src/winml/modelkit/analyze/analyzer.py b/src/winml/modelkit/analyze/analyzer.py index 60d8dac01..779b4a52e 100644 --- a/src/winml/modelkit/analyze/analyzer.py +++ b/src/winml/modelkit/analyze/analyzer.py @@ -140,10 +140,7 @@ def _build_runtime_debug_details_summary( level_bucket[node_stable_key] = candidate_entry continue - if ( - existing_entry.case_indices is None - and candidate_entry.case_indices is not None - ): + if existing_entry.case_indices is None and candidate_entry.case_indices is not None: existing_entry.case_indices = candidate_entry.case_indices if existing_entry.table_path is None and candidate_entry.table_path is not None: @@ -798,7 +795,7 @@ def analyze_from_proto( if device is not None and device.lower() == "auto": from ..sysinfo import resolve_device - resolved, _ = resolve_device("auto") + resolved, _ = resolve_device("auto", ep=None) device_to_use = resolved.upper() logger.info("Device 'auto' resolved to: %s", device_to_use) else: diff --git a/src/winml/modelkit/commands/build.py b/src/winml/modelkit/commands/build.py index 1eebcb116..186d9fdb6 100644 --- a/src/winml/modelkit/commands/build.py +++ b/src/winml/modelkit/commands/build.py @@ -554,7 +554,7 @@ def build( from ..sysinfo import resolve_eps as _resolve_eps try: - resolved_device, _ = _resolve_device(device=device) + resolved_device, _ = _resolve_device(device=device, ep=None) except ValueError as e: raise click.UsageError(str(e)) from e device = resolved_device @@ -642,24 +642,6 @@ def _patch_device(cfg: WinMLBuildConfig) -> None: except ValueError as e: raise click.UsageError(f"Config validation failed: {e}") from e - # Fail fast for compile builds: compilation physically instantiates the - # EP (EPContext), so the target EP/device must be present on this - # machine. Catch an unavailable combo here -- before export/optimize -- - # instead of surfacing deep in the compile stage. A no-compile build - # only produces a portable, analyzed ONNX and may legitimately target a - # device absent on this machine (cross-compile), so it is left to run. - for _cfg in _configs_to_validate: - _compile = _cfg.compile - if _compile is None or _compile.ep_config is None: - continue - from ..sysinfo import resolve_device as _resolve_device_check - - try: - _resolve_device_check(device=device or "auto", ep=_compile.ep_config.provider) - except ValueError as e: - raise click.UsageError(str(e)) from e - break - preloaded_hf_config = _validate_loader_tasks_for_model( model_id=model_id, configs=_configs_to_validate, @@ -1031,18 +1013,11 @@ def _on_iteration_start(iteration: int, max_iter: int) -> None: _header_shown[0] = False # Resolve "auto" to a concrete device once so that has_rule_data_for_ep - # doesn't search for non-existent "*_AUTO_*.parquet" files. Only a device - # name is needed (the EP comes from each analyzer callback below), so - # don't pass ep or availability-fail here: a no-compile cross-compile - # build may target a device absent on this machine (compile builds are - # gated earlier, before export). + # doesn't search for non-existent "*_AUTO_*.parquet" files. from ..analyze.utils.ep_utils import has_rule_data_for_ep from ..sysinfo import resolve_device as _resolve_device - if device and device.lower() != "auto": - _resolved_device = device.lower() - else: - _resolved_device, _ = _resolve_device(device="auto") + _resolved_device, _ = _resolve_device(device=device or "auto", ep=ep) def _on_ep_start(ep_name: EPName, operator_counts: dict) -> None: nonlocal _current_ep diff --git a/src/winml/modelkit/commands/eval.py b/src/winml/modelkit/commands/eval.py index 414153b09..d7c6aa448 100644 --- a/src/winml/modelkit/commands/eval.py +++ b/src/winml/modelkit/commands/eval.py @@ -364,7 +364,7 @@ def _resolve_device(cfg: WinMLEvaluationConfig) -> None: console = Console(stderr=True) console.print("[bold]Detecting available devices...[/bold]") - resolved, _ = resolve_device(cfg.device) + resolved, _ = resolve_device(cfg.device, ep=None) cfg.device = resolved console.print(f"[dim]Using device:[/dim] {resolved}") diff --git a/src/winml/modelkit/compiler/stages/compile.py b/src/winml/modelkit/compiler/stages/compile.py index 4bc1c28c4..da761882f 100644 --- a/src/winml/modelkit/compiler/stages/compile.py +++ b/src/winml/modelkit/compiler/stages/compile.py @@ -176,7 +176,7 @@ def _compile_multiple(self, context: CompileContext) -> None: sess_options = context.shared_session_options if sess_options is None: register_execution_providers(ort=True) - resolved_device, _ = resolve_device(context.config.get("device", "auto")) + resolved_device, _ = resolve_device(context.config.get("device", "auto"), ep=None) ep = normalize_ep_name(ep_config.provider) or resolve_eps(resolved_device)[0] device_type = DEVICE_TO_DEVICE_TYPE.get(resolved_device.upper()) diff --git a/src/winml/modelkit/sysinfo/device.py b/src/winml/modelkit/sysinfo/device.py index 84b45164c..05e714102 100644 --- a/src/winml/modelkit/sysinfo/device.py +++ b/src/winml/modelkit/sysinfo/device.py @@ -145,7 +145,7 @@ def _get_available_eps() -> frozenset[EPName]: def resolve_device( - device: str = "auto", + device: str, *, ep: EPNameOrAlias | None, ) -> tuple[str, list[str]]: diff --git a/tests/unit/commands/test_build.py b/tests/unit/commands/test_build.py index 904f55314..ffa29bb7c 100644 --- a/tests/unit/commands/test_build.py +++ b/tests/unit/commands/test_build.py @@ -1742,38 +1742,3 @@ def test_ep_forwarded_to_generate_build_config( ) assert result.exit_code == 0, result.output assert mock_gen.call_args.kwargs["ep"] == "openvino" - - def test_compile_build_unavailable_ep_fails_before_build( - self, tmp_path: Path, mock_run_single_build: MagicMock - ): - """A compile build whose EP isn't present fails fast, before _run_single_build. - - Compilation physically instantiates the EP, so an unavailable EP must be - caught up front rather than deep in the compile stage. - """ - cfg = _make_minimal_config_file(tmp_path, compile_section={"execution_provider": "qnn"}) - with patch( - "winml.modelkit.sysinfo.resolve_device", - side_effect=ValueError("Requested EP 'qnn' is not available on this system."), - ): - result = _invoke([*self._base_args(cfg, tmp_path), "--ep", "qnn", "--compile"]) - assert result.exit_code == 2, result.output - assert "not available on this system" in result.output - mock_run_single_build.assert_not_called() - - def test_no_compile_build_skips_ep_availability_gate( - self, tmp_path: Path, mock_run_single_build: MagicMock - ): - """A no-compile build proceeds even when the EP isn't present (cross-compile). - - No-compile only produces a portable, analyzed ONNX, so it must not require - the target EP/device to exist on the build machine. - """ - cfg = _make_minimal_config_file(tmp_path, compile_section={"execution_provider": "qnn"}) - with patch( - "winml.modelkit.sysinfo.resolve_device", - side_effect=ValueError("Requested EP 'qnn' is not available on this system."), - ): - result = _invoke([*self._base_args(cfg, tmp_path), "--ep", "qnn", "--no-compile"]) - assert result.exit_code == 0, result.output - mock_run_single_build.assert_called_once() diff --git a/tests/unit/sysinfo/test_device.py b/tests/unit/sysinfo/test_device.py index aa730ea75..5062abd63 100644 --- a/tests/unit/sysinfo/test_device.py +++ b/tests/unit/sysinfo/test_device.py @@ -83,7 +83,7 @@ def test_no_npu_no_gpu(self) -> None: def test_returns_empty_when_enumeration_fails(self) -> None: """If EP enumeration raises, return empty tuple (no devices visible). - ``resolve_device("auto")`` is responsible for the CPU fallback when no + ``resolve_device("auto", ep=None)`` is responsible for the CPU fallback when no devices are reachable; ``_get_available_devices`` only reports what is actually registered. """ @@ -214,7 +214,7 @@ def test_resolve_device_auto_npu_with_ep(self) -> None: "cpu": ("CPUExecutionProvider",), } ): - device, available = resolve_device("auto") + device, available = resolve_device("auto", ep=None) assert device == "npu" assert available == ["npu", "gpu", "cpu"] @@ -227,7 +227,7 @@ def test_resolve_device_auto_npu_without_ep(self) -> None: "cpu": ("CPUExecutionProvider",), } ): - device, available = resolve_device("auto") + device, available = resolve_device("auto", ep=None) assert device == "gpu" assert available == ["gpu", "cpu"] @@ -235,7 +235,7 @@ def test_resolve_device_auto_npu_without_ep(self) -> None: def test_resolve_device_auto_cpu_fallback(self) -> None: """Auto mode: only CPU EP registered -> returns "cpu".""" with _patch_device_ep_map({"cpu": ("CPUExecutionProvider",)}): - device, available = resolve_device("auto") + device, available = resolve_device("auto", ep=None) assert device == "cpu" assert available == ["cpu"] @@ -248,7 +248,7 @@ def test_resolve_device_explicit_valid(self) -> None: "cpu": ("CPUExecutionProvider",), } ): - device, available = resolve_device("gpu") + device, available = resolve_device("gpu", ep=None) assert device == "gpu" assert available == ["gpu", "cpu"] @@ -256,7 +256,7 @@ def test_resolve_device_explicit_valid(self) -> None: def test_resolve_device_explicit_invalid(self) -> None: """Unrecognized device "tpu" -> raises ValueError.""" with pytest.raises(ValueError, match="Unknown device 'tpu'"): - resolve_device("tpu") + resolve_device("tpu", ep=None) def test_resolve_device_explicit_no_ep_error_names_missing_eps(self) -> None: """Error message must name the compatible EPs so users know what to install.""" @@ -268,7 +268,7 @@ def test_resolve_device_explicit_no_ep_error_names_missing_eps(self) -> None: ), pytest.raises(ValueError) as exc_info, ): - resolve_device("npu") + resolve_device("npu", ep=None) message = str(exc_info.value) assert "no compatible EP" in message @@ -278,7 +278,7 @@ def test_resolve_device_explicit_no_ep_error_names_missing_eps(self) -> None: def test_resolve_device_case_insensitive(self) -> None: """Device argument should be case-insensitive.""" with _patch_device_ep_map({"cpu": ("CPUExecutionProvider",)}): - device, _ = resolve_device("CPU") + device, _ = resolve_device("CPU", ep=None) assert device == "cpu" @@ -293,7 +293,7 @@ def test_resolve_device_no_eps_raises(self) -> None: _patch_device_ep_map({}), pytest.raises(RuntimeError, match="No execution providers detected"), ): - resolve_device("auto") + resolve_device("auto", ep=None) class TestResolveDeviceWithEp: