File size: 3,146 Bytes
c6dc269
 
6c96c7d
5a206cd
acf8351
10379c1
e2a88a0
 
 
6fb9798
45968fa
d03396c
 
45968fa
 
e2a88a0
7f0298f
e2a88a0
 
7f0298f
e2a88a0
 
 
 
 
c6dc269
5a206cd
9bbf8c6
acf8351
6c96c7d
d8f6691
 
 
6c96c7d
d8f6691
41debeb
d8f6691
b13ea5d
d8f6691
 
12c48cb
41debeb
 
d8f6691
acf8351
 
d8f6691
acf8351
 
 
 
 
3b9304d
c6dc269
23171a7
00ac646
6fb9798
00ac646
4ade980
 
86088c2
 
9b8124b
86088c2
4ade980
 
c6dc269
23171a7
c6dc269
 
 
23171a7
c6dc269
 
 
5a206cd
f62bfee
5a206cd
c6dc269
23171a7
5a206cd
c6dc269
23171a7
5a206cd
 
c6dc269
23171a7
c6dc269
 
 
23171a7
c6dc269
f62bfee
5a206cd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import streamlit as st
import pandas as pd
import torch
from transformers import TapexTokenizer, BartForConditionalGeneration
import datetime

# Load the CSV file
df = pd.read_csv("anomalies.csv", quotechar='"')

df.rename(columns={"ds": "Ano e mês", "real": "Valor Monetário", "Group": "Grupo"}, inplace=True)

df.sort_values(by=['Ano e mês', 'Valor Monetário'], ascending=False, inplace=True)

print(df)

# Filter 'real' higher than 10 Million
df= df[df['Valor Monetário'] >= 1000000.]

# Convert 'real' column to standard float format and then to strings
df['Valor Monetário'] = df['Valor Monetário'].apply(lambda x: f"{x:.2f}")

# Fill NaN values and convert all columns to strings
df = df.fillna('').astype(str)

table_data = df

# Function to generate a response using the TAPEX model
def response(user_question, table_data):
    a = datetime.datetime.now()

    model_name = "microsoft/tapex-large-finetuned-wtq"
    model = BartForConditionalGeneration.from_pretrained(model_name)
    tokenizer = TapexTokenizer.from_pretrained(model_name)

    queries = [user_question]

    encoding = tokenizer(table=table_data, query=queries, padding=True, return_tensors="pt", truncation=True)

    # Experiment with generation parameters
    outputs = model.generate(
        **encoding
    )

    ans = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    query_result = {
        "Resposta": ans[0]
    }

    b = datetime.datetime.now()
    print(b - a)

    return query_result

# Streamlit interface

st.dataframe(table_data.head())

st.markdown("""
<div style='display: flex; align-items: center;'>
    <div style='width: 40px; height: 40px; background-color: green; border-radius: 50%; margin-right: 5px;'></div>
    <div style='width: 40px; height: 40px; background-color: red; border-radius: 50%; margin-right: 5px;'></div>
    <div style='width: 40px; height: 40px; background-color: yellow; border-radius: 50%; margin-right: 5px;'></div>
    <span style='font-size: 40px; font-weight: bold;'>Chatbot do Tesouro RS</span>
</div>
""", unsafe_allow_html=True)

# Chat history
if 'history' not in st.session_state:
    st.session_state['history'] = []

# Input box for user question
user_question = st.text_input("Escreva sua questão aqui:", "")

if user_question:
    # Add human emoji when user asks a question
    st.session_state['history'].append(('👤', user_question))
    st.markdown(f"**👤 {user_question}**")
    
    # Generate the response
    bot_response = response(user_question, table_data)["Resposta"]
    
    # Add robot emoji when generating response and align to the right
    st.session_state['history'].append(('🤖', bot_response))
    st.markdown(f"<div style='text-align: right'>**🤖 {bot_response}**</div>", unsafe_allow_html=True)

# Clear history button
if st.button("Limpar"):
    st.session_state['history'] = []

# Display chat history
for sender, message in st.session_state['history']:
    if sender == '👤':
        st.markdown(f"**👤 {message}**")
    elif sender == '🤖':
        st.markdown(f"<div style='text-align: right'>**🤖 {message}**</div>", unsafe_allow_html=True)