import os import bitsandbytes as bnb import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig access_token = os.environ["GATED_ACCESS_TOKEN"] quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="torch.float16", ) model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", quantization_config=True, device_map="auto") tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") # Load the tokenizer and model #model_id = "mistralai/Mixtral-8x7B-v0.1" #tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token) #model = AutoModelForCausalLM.from_pretrained(model_id, token=access_token, load_in_4bit=True) #model = AutoModelForCausalLM.from_pretrained(model_id, token=access_token) # Initialize the quantizer #quantizer = bnb.GemmQuantizer(act_bits=8, weight_bits=8) # Quantize the model #model = quantizer(model) # Function to generate text using the model def generate_text(prompt): text = prompt inputs = tokenizer(text, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=20) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Create the Gradio interface iface = gr.Interface( fn=generate_text, inputs=[ gr.inputs.Textbox(lines=5, label="Input Prompt"), ], outputs=gr.outputs.Textbox(label="Generated Text"), title="MixTRAL 8x22B Text Generation", description="Use this interface to generate text using the MixTRAL 8x22B language model.", ) # Launch the Gradio interface iface.launch()