Llama3.1_mini / app.py
faelfernandes's picture
Update app.py
fa8a89c verified
raw
history blame contribute delete
No virus
1.27 kB
import os
from dotenv import load_dotenv
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Carregar as variáveis de ambiente do arquivo .env
load_dotenv()
# Recuperar o token da variável de ambiente
hf_token = os.getenv("HF_API_TOKEN")
# Carregando o modelo e o tokenizer
@st.cache_resource
def load_model():
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16, use_auth_token=hf_token)
return model, tokenizer
model, tokenizer = load_model()
st.title("Chatbot com Meta-Llama 3.1")
# Entrada do usuário
user_input = st.text_input("Você:", "")
if user_input:
# Tokenizando a entrada do usuário
inputs = tokenizer(user_input, return_tensors="pt").to("cuda")
# Gerando a resposta
with torch.no_grad():
outputs = model.generate(inputs.input_ids, max_length=150, do_sample=True, top_p=0.95, temperature=0.7)
# Decodificando e mostrando a resposta
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.text_area("Resposta do Chatbot:", value=response, height=200)