import streamlit as st import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("distilgpt2") model = AutoModelForCausalLM.from_pretrained("distilgpt2", torch_dtype=torch.float16) model = model.to('cuda') if torch.cuda.is_available() else model.to('cpu') # Set the padding token to the end-of-sequence token if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token df = pd.read_csv('anomalies.csv') # Função para gerar resposta def response(question): prompt = f"Considerando os dados: {df.to_string(index=False)}, onde 'ds' está em formato DateTime, 'real' é o valor da despesa e 'group' é o grupo da despesa. Pergunta: {question}" inputs = tokenizer(prompt, return_tensors='pt', padding='max_length', truncation=True, max_length=256) attention_mask = inputs['attention_mask'] input_ids = inputs['input_ids'] generated_ids = model.generate( input_ids, attention_mask=attention_mask, max_length=len(input_ids[0]) + 50, # Reduce max_length to speed up response temperature=0.7, top_p=0.9, no_repeat_ngram_size=2, num_beams=3, # Adding beams for more reliable generation ) generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) final_response = generated_text.split("Resposta:")[-1].split(".")[0] + "." return final_response # Interface Streamlit st.markdown("""