diff --git a/examples/recipes/Helsinki-NLP_opus-mt-fr-en/translation_fp16_decoder_config.json b/examples/recipes/Helsinki-NLP_opus-mt-fr-en/translation_fp16_decoder_config.json new file mode 100644 index 000000000..417ef1ad0 --- /dev/null +++ b/examples/recipes/Helsinki-NLP_opus-mt-fr-en/translation_fp16_decoder_config.json @@ -0,0 +1,285 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "decoder_input_ids", + "dtype": "int32", + "shape": [ + 1, + 1 + ], + "value_range": [ + 0, + 59514 + ] + }, + { + "name": "encoder_hidden_states", + "dtype": "float32", + "shape": [ + 1, + 512, + 512 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "decoder_attention_mask", + "dtype": "int64", + "shape": [ + 1, + 512 + ] + }, + { + "name": "cache_position", + "dtype": "int64", + "shape": [ + 1 + ] + }, + { + "name": "past_0_key", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_0_value", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_key", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_1_value", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_key", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_2_value", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_key", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_3_value", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_key", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_4_value", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_key", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + }, + { + "name": "past_5_value", + "dtype": "float32", + "shape": [ + 1, + 8, + 512, + 64 + ], + "value_range": [ + 0, + 1 + ] + } + ], + "output_tensors": [ + { + "name": "logits" + }, + { + "name": "present_0_key" + }, + { + "name": "present_0_value" + }, + { + "name": "present_1_key" + }, + { + "name": "present_1_value" + }, + { + "name": "present_2_key" + }, + { + "name": "present_2_value" + }, + { + "name": "present_3_key" + }, + { + "name": "present_3_value" + }, + { + "name": "present_4_key" + }, + { + "name": "present_4_value" + }, + { + "name": "present_5_key" + }, + { + "name": "present_5_value" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "text2text-generation", + "model_class": "MarianDecoderWrapper", + "model_type": "marian" + } +} \ No newline at end of file diff --git a/examples/recipes/Helsinki-NLP_opus-mt-fr-en/translation_fp16_encoder_config.json b/examples/recipes/Helsinki-NLP_opus-mt-fr-en/translation_fp16_encoder_config.json new file mode 100644 index 000000000..b59e5fc9f --- /dev/null +++ b/examples/recipes/Helsinki-NLP_opus-mt-fr-en/translation_fp16_encoder_config.json @@ -0,0 +1,57 @@ +{ + "export": { + "opset_version": 17, + "batch_size": 1, + "export_params": true, + "do_constant_folding": true, + "verbose": false, + "dynamo": false, + "enable_hierarchy_tags": true, + "clean_onnx": false, + "hierarchy_tag_format": "full", + "input_tensors": [ + { + "name": "input_ids", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 59514 + ] + }, + { + "name": "attention_mask", + "dtype": "int32", + "shape": [ + 1, + 512 + ], + "value_range": [ + 0, + 2 + ] + } + ], + "output_tensors": [ + { + "name": "encoder_hidden_states" + } + ] + }, + "optim": { + "clamp_constant_values": true, + "gelu_fusion": true, + "matmul_add_fusion": true, + "remove_isnan_in_attention_mask": true + }, + "quant": null, + "compile": null, + "loader": { + "task": "feature-extraction", + "model_class": "MarianEncoderWrapper", + "model_type": "marian" + } +} \ No newline at end of file diff --git a/examples/recipes/README.md b/examples/recipes/README.md index caaa2c15f..ccb644416 100644 --- a/examples/recipes/README.md +++ b/examples/recipes/README.md @@ -17,6 +17,7 @@ Each *(model, task)* includes: | Model | Task | |---|---| | BAAI/bge-large-en-v1.5 | sentence-similarity | +| Helsinki-NLP/opus-mt-fr-en | translation | | cardiffnlp/twitter-roberta-base-sentiment-latest | text-classification | | deepset/roberta-base-squad2 | question-answering | | deepset/tinyroberta-squad2 | question-answering |