from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config import torch from transformers.onnx import OnnxConfig, export from pathlib import Path # Load the T5-efficient-tiny model and tokenizer model_name = "google/t5-efficient-tiny" model = T5ForConditionalGeneration.from_pretrained(model_name) tokenizer = T5Tokenizer.from_pretrained(model_name) config = T5Config.from_pretrained(model_name) # Prepare a sample input text = "Translate English to French: The house is wonderful." inputs = tokenizer(text, return_tensors="pt") # Define the model configuration for ONNX class T5OnnxConfig(OnnxConfig): @property def inputs(self): return { "input_ids": { "shape": [self.batch_size, self.sequence_length], "dtype": torch.int64, }, "attention_mask": { "shape": [self.batch_size, self.sequence_length], "dtype": torch.int64, }, } @property def outputs(self): return { "logits": { "shape": [self.batch_size, self.sequence_length, self.config.vocab_size], "dtype": torch.float32, }, } onnx_config = T5OnnxConfig(config, 1, 128) # Export the model to ONNX format output_path = Path("t5-efficient-tiny.onnx") export( preprocessor=tokenizer, model=model, config=onnx_config, output=output_path ) print("Model has been successfully exported to ONNX format.")