import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Carregando o modelo e o tokenizer @st.cache_resource def load_model(): model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) return model, tokenizer model, tokenizer = load_model() st.title("Chatbot com Meta-Llama 3.1") # Entrada do usuário user_input = st.text_input("Você:", "") if user_input: # Tokenizando a entrada do usuário inputs = tokenizer(user_input, return_tensors="pt").to("cuda") # Gerando a resposta with torch.no_grad(): outputs = model.generate(inputs.input_ids, max_length=150, do_sample=True, top_p=0.95, temperature=0.7) # Decodificando e mostrando a resposta response = tokenizer.decode(outputs[0], skip_special_tokens=True) st.text_area("Resposta do Chatbot:", value=response, height=200)