import os import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, QuantoConfig access_token = os.environ["GATED_ACCESS_TOKEN"] quantization_config = QuantoConfig( weights = "int4" ) tokenizer = AutoTokenizer.from_pretrained("ProbeMedicalYonseiMAILab/medllama3-v20", quantization_config=quantization_config, device_map="auto") model = AutoModelForCausalLM.from_pretrained("ProbeMedicalYonseiMAILab/medllama3-v20") # Function to generate text using the model def generate_text(prompt): text = prompt inputs = tokenizer(text, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=512) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Create the Gradio interface iface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(lines=5, label="Input Prompt"), ], outputs=gr.outputs.Textbox(label="Generated Text"), title="MisTRAL Text Generation", description="Use this interface to generate text using the MisTRAL language model.", ) # Launch the Gradio interface iface.launch()