fschwartzer commited on
Commit
91e5f2f
1 Parent(s): 596bf83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -19
app.py CHANGED
@@ -14,31 +14,33 @@ df['real'] = df['real'].apply(lambda x: f"{x:.2f}")
14
  # Fill NaN values and convert all columns to strings
15
  df = df.fillna('').astype(str)
16
 
17
- # Function to filter the DataFrame using RapidFuzz for dates
18
- def filter_dataframe_by_date(df, date_str, threshold=80):
19
- # Apply fuzzy matching on the 'ds' (date) column
20
- matches = process.extract(date_str, df['ds'], scorer=fuzz.token_sort_ratio, limit=None)
21
- filtered_rows = [match[2] for match in matches if match[1] >= threshold]
22
- return df.iloc[filtered_rows]
23
-
24
- # Function to filter the DataFrame using RapidFuzz for groups
25
- def filter_dataframe_by_group(df, group_keyword, threshold=80):
26
- # Apply fuzzy matching on the 'Group' column
27
- matches = process.extract(group_keyword, df['Group'], scorer=fuzz.token_sort_ratio, limit=None)
28
- filtered_rows = [match[2] for match in matches if match[1] >= threshold]
29
- return df.iloc[filtered_rows]
30
 
31
  # Function to generate a response using the TAPAS model
32
  def response(user_question, df):
33
  a = datetime.datetime.now()
34
 
35
  # Extract date and group keywords from the user question
36
- date_str = "December 2022" # Example; you'd extract this from the user question
37
  group_keyword = "IPVA"
38
 
39
  # Filter the DataFrame by date and group
40
- subset_df = filter_dataframe_by_date(df, date_str)
41
- subset_df = filter_dataframe_by_group(subset_df, group_keyword)
 
 
 
42
 
43
  # Initialize the TAPAS model
44
  tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
@@ -52,10 +54,10 @@ def response(user_question, df):
52
  # Query the TAPAS model
53
  try:
54
  answer = tqa(table=subset_df, query=user_question)['answer']
55
- except IndexError as e:
56
  print(f"Error: {e}")
57
- answer = "Error occurred: " + str(e)
58
-
59
  query_result = {
60
  "Resposta": answer
61
  }
 
14
  # Fill NaN values and convert all columns to strings
15
  df = df.fillna('').astype(str)
16
 
17
+ # Function to filter the DataFrame using RapidFuzz
18
+ def filter_dataframe(df, date_str, group_keyword, threshold=80):
19
+ # Apply fuzzy matching on the 'ds' (date) and 'Group' columns
20
+ date_matches = process.extract(date_str, df['ds'], scorer=fuzz.token_sort_ratio, limit=None)
21
+ group_matches = process.extract(group_keyword, df['Group'], scorer=fuzz.token_sort_ratio, limit=None)
22
+
23
+ # Get the indices that match both criteria
24
+ date_indices = {match[2] for match in date_matches if match[1] >= threshold}
25
+ group_indices = {match[2] for match in group_matches if match[1] >= threshold}
26
+ common_indices = list(date_indices & group_indices)
27
+
28
+ return df.iloc[common_indices]
 
29
 
30
  # Function to generate a response using the TAPAS model
31
  def response(user_question, df):
32
  a = datetime.datetime.now()
33
 
34
  # Extract date and group keywords from the user question
35
+ date_str = "December 2022" # Example; you'd extract this from the user question dynamically
36
  group_keyword = "IPVA"
37
 
38
  # Filter the DataFrame by date and group
39
+ subset_df = filter_dataframe(df, date_str, group_keyword)
40
+
41
+ # Check if the DataFrame is empty
42
+ if subset_df.empty:
43
+ return {"Resposta": "Desculpe, não há dados disponíveis para responder à sua pergunta."}
44
 
45
  # Initialize the TAPAS model
46
  tqa = pipeline(task="table-question-answering", model="google/tapas-large-finetuned-wtq",
 
54
  # Query the TAPAS model
55
  try:
56
  answer = tqa(table=subset_df, query=user_question)['answer']
57
+ except ValueError as e:
58
  print(f"Error: {e}")
59
+ answer = "Desculpe, ocorreu um erro ao processar sua pergunta."
60
+
61
  query_result = {
62
  "Resposta": answer
63
  }