Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 62 additions & 6 deletions src/winml/modelkit/commands/perf.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,46 @@ def __init__(self, config: BenchmarkConfig) -> None:
self._model: WinMLPreTrainedModel | WinMLCompositeModel | None = None
self._inputs: dict[str, np.ndarray] | None = None
self._memory: dict[str, float] | None = None
# Concrete device + EP resolved from the config's request, populated by
# _resolve_device_ep() on the first call (before the build). The config
# keeps the raw request (e.g. "auto"); these hold what actually drives
# the build and inference.
self._resolved_device: str | None = None
self._resolved_ep: EPNameOrAlias | None = None

def _resolve_device_ep(self) -> None:
"""Resolve the concrete target device + EP, failing fast on a bad combo.

Idempotent: resolves once, then returns cached values. Called at the
start of model loading so an unavailable/invalid device+EP raises here —
before the export/optimize/quantize/compile pipeline runs — instead of
only surfacing at session.compile(). Deriving a concrete EP also lets the
build's static analyzer target one EP instead of aggregating across all
of them (WinMLAutoModel itself stays permissive: ep=None is a valid
library mode).

Raises:
ValueError: If the requested device/EP combination is unavailable
or invalid (propagated from ``resolve_device``).
"""
if self._resolved_device is not None:
return

from ..sysinfo import resolve_device, resolve_eps

# resolve_device() availability-checks even when --ep is explicit, so a
# named-but-absent EP is caught here too.
resolved_device, _ = resolve_device(device=self.config.device, ep=self.config.ep)
if self.config.ep is not None:
# Keep the user's EP (alias or canonical) verbatim — downstream
# stages normalize it. Only derive one when the user gave none.
resolved_ep: EPNameOrAlias | None = self.config.ep
else:
device_eps = resolve_eps(resolved_device)
resolved_ep = device_eps[0] if device_eps else None
Comment thread
xieofxie marked this conversation as resolved.

self._resolved_device = resolved_device
self._resolved_ep = resolved_ep

@property
def _is_composite(self) -> bool:
Expand Down Expand Up @@ -478,6 +518,10 @@ def _load_model(self) -> None:
from ..config import WinMLBuildConfig
from ..models import WinMLAutoModel

# Resolve the concrete device + EP first so a bad combo fails fast,
# before from_pretrained/from_onnx kick off the build pipeline.
self._resolve_device_ep()

model_id = self.config.model_id
model_path = Path(model_id)
is_onnx = model_path.suffix.lower() == ".onnx"
Expand All @@ -500,9 +544,9 @@ def _load_model(self) -> None:
common_kwargs: dict[str, Any] = {
"task": self.config.task,
"config": override,
"device": self.config.device,
"device": self._resolved_device or self.config.device,
"precision": self.config.precision,
"ep": self.config.ep,
"ep": self._resolved_ep,
"provider_options": self.config.ep_options,
"use_cache": use_cache,
"force_rebuild": force_rebuild,
Expand Down Expand Up @@ -538,7 +582,7 @@ def _resolve_adapter_luid(self) -> str | None:
return None

assert self._model is not None
device = self._single.device or self.config.device
device = self._single.device or self._resolved_device or self.config.device
if device == "cpu":
return None

Expand Down Expand Up @@ -739,7 +783,8 @@ def _perf_modules(
monitor: If True, wrap each per-module benchmark with HWMonitor.
device: Target device policy ("auto", "cpu", "gpu", "npu").
ep: Explicit execution provider (e.g., "qnn", "dml"). Overrides
device-to-provider mapping when set.
device-to-provider mapping when set. When ``None``, a concrete EP is
derived from the resolved device so the analyzer targets one EP.
ep_options: Runtime EP provider options (e.g. QNN
``htp_performance_mode``) forwarded to each per-module session.
precision: Precision mode passed through to the build stage.
Expand All @@ -752,10 +797,17 @@ def _perf_modules(

from ..build import build_hf_model
from ..config import SubmoduleClassNotFoundError, generate_hf_build_config
from ..sysinfo import resolve_device
from ..sysinfo import resolve_device, resolve_eps
from .build import _instantiate_parent_model

resolved_device, _ = resolve_device(device=device, ep=ep)
# Derive a concrete EP when none was given so each per-module build's static
# analyzer targets one EP instead of ep=None (which aggregates across all
# EPs and warns; see #931). An explicit EP is kept verbatim — downstream
# stages normalize it.
if ep is None:
device_eps = resolve_eps(resolved_device)
ep = device_eps[0] if device_eps else None
Comment thread
xieofxie marked this conversation as resolved.

console.print(f"[dim]Generating module configs for {module_class}...[/dim]")

Expand Down Expand Up @@ -1514,6 +1566,8 @@ def perf(
"[yellow]Warning:[/yellow] --shape-config is not supported "
"in --module mode and will be ignored."
)
# _perf_modules resolves the device + derives a concrete EP internally
# (it will fold into PerfBenchmark — see #939).
_perf_modules(
hf_model=hf_model,
module_class=module_class,
Expand Down Expand Up @@ -1559,7 +1613,9 @@ def perf(
if output is None:
output = generate_output_path(hf_model)

# Create config
# Create config. The raw device/EP request is passed through unchanged;
# PerfBenchmark resolves the concrete device + EP internally (failing fast
# before the build), so the CLI does not pre-resolve here.
config = BenchmarkConfig(
model_id=hf_model,
task=task,
Expand Down
117 changes: 113 additions & 4 deletions tests/unit/commands/test_perf_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,20 @@

@pytest.fixture(autouse=True)
def mock_resolve_device():
"""Mock resolve_device to avoid hardware detection in all perf CLI tests."""
with patch(
"winml.modelkit.sysinfo.resolve_device",
return_value=("cpu", ["cpu"]),
"""Mock device/EP resolution to avoid hardware detection in all perf CLI tests.

perf() resolves the device (and, when --ep is omitted, derives a concrete EP
via resolve_eps) up front, so both are stubbed to a deterministic CPU result.
"""
with (
patch(
"winml.modelkit.sysinfo.resolve_device",
return_value=("cpu", ["cpu"]),
),
patch(
"winml.modelkit.sysinfo.resolve_eps",
return_value=["CPUExecutionProvider"],
),
):
yield

Expand Down Expand Up @@ -495,6 +505,105 @@ def test_cli_ep_options_invalid_format_rejected(
assert result.exit_code != 0
assert "KEY=VALUE" in result.output

def test_load_model_no_ep_derives_concrete_ep(self, tmp_path: Path) -> None:
"""Without an EP, PerfBenchmark resolves a concrete one before building.

Regression guard: previously ep stayed None down to the build, so the
static analyzer ran with ep=None and aggregated across all EPs (and
logged a warning). PerfBenchmark now resolves the EP from the device
(autouse fixture stubs resolve_eps -> ["CPUExecutionProvider"]) and
passes it to from_onnx. The config keeps the raw request (ep=None);
the resolved value lives on the instance.
"""
onnx_file = tmp_path / "model.onnx"
onnx_file.write_bytes(b"fake onnx")

config = BenchmarkConfig(model_id=str(onnx_file), task="image-classification")
benchmark = PerfBenchmark(config)

with patch(
"winml.modelkit.models.auto.WinMLAutoModel.from_onnx",
return_value=MagicMock(),
) as mock_from_onnx:
benchmark._load_model()

assert mock_from_onnx.call_args.kwargs["ep"] == "CPUExecutionProvider"
assert benchmark._resolved_ep == "CPUExecutionProvider"
assert config.ep is None

def test_load_model_explicit_ep_passed_through_verbatim(self, tmp_path: Path) -> None:
"""An explicit EP reaches from_onnx unchanged (no normalization).

Downstream build/session stages normalize aliases themselves, so
PerfBenchmark must not rewrite the user's value (e.g. 'qnn' stays 'qnn').
"""
onnx_file = tmp_path / "model.onnx"
onnx_file.write_bytes(b"fake onnx")

config = BenchmarkConfig(
model_id=str(onnx_file), task="image-classification", device="npu", ep="qnn"
)
benchmark = PerfBenchmark(config)

with patch(
"winml.modelkit.models.auto.WinMLAutoModel.from_onnx",
return_value=MagicMock(),
) as mock_from_onnx:
benchmark._load_model()

assert mock_from_onnx.call_args.kwargs["ep"] == "qnn"
assert benchmark._resolved_ep == "qnn"

def test_load_model_unavailable_device_ep_fails_before_build(self, tmp_path: Path) -> None:
"""An unavailable device/EP combo fails before the build pipeline runs.

PerfBenchmark resolves device+EP at the start of _load_model, so an
unavailable combo (resolve_device raises ValueError) surfaces before
from_onnx kicks off the build — the user does not wait for the whole
build only to fail at session.compile().
"""
onnx_file = tmp_path / "model.onnx"
onnx_file.write_bytes(b"fake onnx")

config = BenchmarkConfig(model_id=str(onnx_file), task="image-classification", device="npu")
benchmark = PerfBenchmark(config)

with (
patch(
"winml.modelkit.sysinfo.resolve_device",
side_effect=ValueError("no compatible EP is available"),
),
patch("winml.modelkit.models.auto.WinMLAutoModel.from_onnx") as mock_from_onnx,
pytest.raises(ValueError, match="no compatible EP is available"),
):
benchmark._load_model()

mock_from_onnx.assert_not_called()

def test_cli_unavailable_device_ep_surfaces_error(
self, runner: CliRunner, tmp_path: Path
) -> None:
"""The CLI surfaces the fail-fast resolution error with a non-zero exit."""
onnx_file = tmp_path / "model.onnx"
onnx_file.write_bytes(b"fake onnx")

with (
patch(
"winml.modelkit.sysinfo.resolve_device",
side_effect=ValueError("no compatible EP is available"),
),
patch("winml.modelkit.models.auto.WinMLAutoModel.from_onnx") as mock_from_onnx,
):
result = runner.invoke(
perf,
["-m", str(onnx_file), "--device", "npu", "-o", str(tmp_path / "out.json")],
obj={},
)

assert result.exit_code != 0
assert "no compatible EP is available" in result.output
mock_from_onnx.assert_not_called()

def test_help_shows_ep_options(self, runner: CliRunner) -> None:
result = runner.invoke(perf, ["--help"])
assert result.exit_code == 0
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/commands/test_perf_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from typing import TYPE_CHECKING
from unittest.mock import ANY, MagicMock, patch

import pytest
from click.testing import CliRunner

from winml.modelkit.cli import main
Expand All @@ -20,6 +21,28 @@
from pathlib import Path


@pytest.fixture(autouse=True)
def _mock_device_resolution():
"""Stub perf()'s up-front device/EP resolution so module tests stay hermetic.

perf() calls resolve_device() (and resolve_eps() when --ep is omitted) before
branching into module mode. Tests that need a specific device override
resolve_device locally inside their own ``with patch(...)`` block, which
nests over (and wins against) this autouse default.
"""
with (
patch(
"winml.modelkit.sysinfo.resolve_device",
return_value=("cpu", ["cpu"]),
),
patch(
"winml.modelkit.sysinfo.resolve_eps",
return_value=["CPUExecutionProvider"],
),
):
yield


class TestPerfModuleFlag:
"""Tests for --module flag on winml perf."""

Expand Down
Loading