import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer peft_model_id = "Bsbell21/MarketMailAI" config = PeftConfig.from_pretrained(peft_model_id) model = AutoModelForCausalLM.from_pretrained( config.base_model_name_or_path, return_dict=True, device_map="auto" ) #tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) mixtral_tokenizer = AutoTokenizer.from_pretrained(peft_model_id) # Load the Lora model model = PeftModel.from_pretrained(model, peft_model_id) def input_from_text(product, description): return f"[INST]Below is a product and description, please write a marketing email for this product.\n\n### Product:\n{product}\n### Description:\n{description}\n\n### Marketing Email:[/INST]" def make_inference(product, description): inputs = mixtral_tokenizer(input_from_text(product, description), return_tensors="pt") outputs = merged_model.generate( **inputs, max_new_tokens=150, generation_kwargs={"repetition_penalty" : 1.7} ) # print(mixtral_tokenizer.decode(outputs[0], skip_special_tokens=True)) result = mixtral_tokenizer.decode(outputs[0], skip_special_tokens=True).split("[/INST]")[1] return result ''' def make_inference(product_name, product_description): batch = tokenizer( f"### Product and Description:\n{product_name}: {product_description}\n\n### Ad:", return_tensors="pt", ) batch = {key: value.to('cuda:0') for key, value in batch.items()} with torch.cuda.amp.autocast(): output_tokens = model.generate(**batch, max_new_tokens=50) return tokenizer.decode(output_tokens[0], skip_special_tokens=True) ''' if __name__ == "__main__": # make a gradio interface import gradio as gr gr.Interface( make_inference, [ gr.Textbox(lines=2, label="Product Name"), gr.Textbox(lines=5, label="Product Description"), ], gr.Textbox(label="Ad"), title="GenerAd-AI", description="GenerAd-AI is a generative model that generates ads for products.", ).launch()