Spaces:

fschwartzer
/

streamlit_chatbot

Running

App Files Files Community

streamlit_chatbot / app[tapas].py

fschwartzer

Rename app.py to app[tapas].py

13aa08a verified 8 days ago

raw

history blame

No virus

4.31 kB

	import streamlit as st
	import pandas as pd
	import torch
	from transformers import pipeline
	import datetime
	from rapidfuzz import process, fuzz
	from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

	# Load the CSV file
	df = pd.read_csv("anomalies.csv", quotechar='"')

	# Filter 'real' higher than 10 Million
	df= df[df['real'] >= 1000000.]

	# Convert 'real' column to standard float format and then to strings
	df['real'] = df['real'].apply(lambda x: f"{x:.2f}")

	# Fill NaN values and convert all columns to strings
	df = df.fillna('').astype(str)

	print(df)

	# Function to remove stopwords
	def remove_stopwords(text, stopwords=ENGLISH_STOP_WORDS):
	return ' '.join([word for word in text.split() if word.lower() not in stopwords])

	# Function to filter DataFrame by checking if any of the user question words are in the columns
	def filter_dataframe(df, user_question, threshold=80):
	user_question = remove_stopwords(user_question) # Remove stopwords
	question_words = user_question.split()

	mask = pd.Series([False] * len(df), index=df.index)

	for column in df.columns:
	for word in question_words:
	# Apply RapidFuzz fuzzy matching on the column
	matches = process.extract(word, df[column], scorer=fuzz.token_sort_ratio, limit=None)
	match_indices = [match[2] for match in matches if match[1] >= threshold]
	mask.loc[match_indices] = True # Ensure the mask is aligned with the DataFrame index

	filtered_df = df[mask]

	return filtered_df

	# Function to generate a response using the TAPAS model
	def response(user_question, df):
	a = datetime.datetime.now()

	# Filter the DataFrame dynamically by user question
	subset_df = filter_dataframe(df, user_question)

	# Check if the DataFrame is empty
	if subset_df.empty:
	return {"Resposta": "Desculpe, não há dados disponíveis para responder à sua pergunta."}

	# Initialize the TAPAS model
	tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
	tokenizer_kwargs={"clean_up_tokenization_spaces": False})

	# Debugging information
	print("Filtered DataFrame shape:", subset_df.shape)
	print("Filtered DataFrame head:\n", subset_df.head())
	print("User question:", user_question)

	# Query the TAPAS model
	try:
	answer = tqa(table=subset_df, query=user_question)['answer']
	except ValueError as e:
	print(f"Error: {e}")
	answer = "Desculpe, ocorreu um erro ao processar sua pergunta."

	query_result = {
	"Resposta": answer
	}

	b = datetime.datetime.now()
	print("Time taken:", b - a)

	return query_result

	# Streamlit interface
	st.markdown("""
	<div style='display: flex; align-items: center;'>
	<div style='width: 40px; height: 40px; background-color: green; border-radius: 50%; margin-right: 5px;'></div>
	<div style='width: 40px; height: 40px; background-color: red; border-radius: 50%; margin-right: 5px;'></div>
	<div style='width: 40px; height: 40px; background-color: yellow; border-radius: 50%; margin-right: 5px;'></div>
	<span style='font-size: 40px; font-weight: bold;'>Chatbot do Tesouro RS</span>
	</div>
	""", unsafe_allow_html=True)

	# Chat history
	if 'history' not in st.session_state:
	st.session_state['history'] = []

	# Input box for user question
	user_question = st.text_input("Escreva sua questão aqui:", "")

	if user_question:
	# Add human emoji when user asks a question
	st.session_state['history'].append(('👤', user_question))
	st.markdown(f"👤 {user_question}")

	# Generate the response
	bot_response = response(user_question, df)["Resposta"]

	# Add robot emoji when generating response and align to the right
	st.session_state['history'].append(('🤖', bot_response))
	st.markdown(f"<div style='text-align: right'>🤖 {bot_response}</div>", unsafe_allow_html=True)

	# Clear history button
	if st.button("Limpar"):
	st.session_state['history'] = []

	# Display chat history
	for sender, message in st.session_state['history']:
	if sender == '👤':
	st.markdown(f"👤 {message}")
	elif sender == '🤖':
	st.markdown(f"<div style='text-align: right'>🤖 {message}</div>", unsafe_allow_html=True)