From 12a391d324e2a37470233df8e6aeef038e680cbb Mon Sep 17 00:00:00 2001 From: "Shiyi Zheng (from Dev Box)" Date: Tue, 23 Jun 2026 10:57:32 +0800 Subject: [PATCH] examples: add facebook/bart-large-mnli text-classification recipe Ships an fp32 NLI head for facebook/bart-large-mnli at task=text-classification. Recipe carries value_range=[2,3] on input_ids to deterministically inject the eos_token_id required by BartForSequenceClassification's eos-pooling head. Goal-ladder verdict (CPU): - L0 build PASS - 1042 ops, 21 unique types, 407M params, 384 KB graph + 1.6 GB external data - L1-CPU perf PASS - 1.64 s/iter on 1024-token real-tokenized input (custom Python script; winml perf ignores recipe value_range and crashes on eos-pooling models with random ints - winml CLI feature gap to file separately) - L2 numerical PASS - cosine = 1.000000, max_abs = 1e-6 vs PyTorch reference (argmax = 2, ENTAILMENT, on both sides) - L3 task-metric PASS - accuracy = 0.88, latency = 1.89 s/sample on glue/mnli/validation_matched/100 samples, seed=42 (matches published ~0.886 within MC noise; first end-to-end Goal-L3 PASS for this repo) DML/QNN/OpenVINO are HOST-BLOCKED on producer host (DML 0xC0000409, QNN absent, OpenVINO DLL-load-fails) - not penalized per local skill convention. Optimum-coverage: VENDOR-COVERED on text-classification via Optimum BartOnnxConfig; recipe is pure-data, no per-architecture code change needed. Producer notes from running the recipe live in research/adding-model-support/ model_knowledge/bart.json on the skills-poc working branch (not landed to main yet; pending separate skill-research PR for the full research/ tree). --- examples/recipes/README.md | 1 + .../text-classification_config.json | 58 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 examples/recipes/facebook_bart-large-mnli/text-classification_config.json diff --git a/examples/recipes/README.md b/examples/recipes/README.md index caaa2c15f..ead4e01a2 100644 --- a/examples/recipes/README.md +++ b/examples/recipes/README.md @@ -17,6 +17,7 @@ Each *(model, task)* includes: | Model | Task | |---|---| | BAAI/bge-large-en-v1.5 | sentence-similarity | +| facebook/bart-large-mnli | text-classification | | cardiffnlp/twitter-roberta-base-sentiment-latest | text-classification | | deepset/roberta-base-squad2 | question-answering | | deepset/tinyroberta-squad2 | question-answering | diff --git a/examples/recipes/facebook_bart-large-mnli/text-classification_config.json b/examples/recipes/facebook_bart-large-mnli/text-classification_config.json new file mode 100644 index 000000000..7d2395177 --- /dev/null +++ b/examples/recipes/facebook_bart-large-mnli/text-classification_config.json @@ -0,0 +1,58 @@ +{ + "_note": "Workaround: input_ids.value_range narrowed to [2,3] (eos_token_id=2) so BartForSequenceClassification eos-pooling never sees an empty match during export. Verified end-to-end: build complete (157s), L0 PASS (1042 nodes, opset 17), L1-CPU PASS (1638ms/iter with real input — `winml perf` will fail because random ints lack eos), L2 PASS (cosine=1.0, max_abs=1e-6 vs PyTorch on premise+hypothesis pair). See research/adding-model-support/model_knowledge/bart.json bart-004.", + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 2, + 3 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 1024 + ], + "value_range": [ + 1, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text-classification", + "model_class": "AutoModelForSequenceClassification", + "model_type": "bart" + } +}