Spaces:

rande
/

TagaCare

Sleeping

App Files Files Community

TagaCare / app.py

rande

Update app.py

17eaeb4 10 months ago

raw

history blame contribute delete

No virus

2.89 kB

	import streamlit as st
	import tl_calamancy_lg
	import os
	import pandas as pd
	import json
	from sklearn.metrics.pairwise import cosine_similarity

	# Define the cache decorator for loading the spaCy model
	@st.cache_resource()
	def load_nlp_model():
	return tl_calamancy_lg.load()

	# Load the spaCy model using the cached function
	nlp = load_nlp_model()

	# Define the cache decorator for loading the DataFrame
	@st.cache_data
	def load_data(file_path):
	# Open the JSON file
	with open(file_path, 'r') as file:
	# Load the JSON data
	data = json.load(file)

	# Extract patterns and responses into separate lists
	patterns_data = []
	responses_data = []

	for intent in data["intents"]:
	tag = intent["tag"]
	patterns = intent.get("patterns", [])
	responses = intent.get("responses", [])

	for pattern in patterns:
	patterns_data.append({"tag": tag, "pattern": pattern})

	for response in responses:
	responses_data.append({"tag": tag, "response": response})

	# Create and return DataFrames
	patterns_df = pd.DataFrame(patterns_data)
	responses_df = pd.DataFrame(responses_data)
	return patterns_df, responses_df

	# Get the absolute path of the script directory
	cwd = os.getcwd()

	# Read the CSV file
	file_path = os.path.join(cwd, "dataset_v2.json")

	# Load the DataFrames using the cached function
	patterns_df, responses_df = load_data(file_path)

	# Define the cache decorator for the similarity function
	@st.cache_data
	def get_most_similar_tag(user_query, dataframe):
	# Process user query and existing queries with spaCy
	all_queries = list(dataframe['pattern']) + [user_query]
	processed_queries = [nlp(query) for query in all_queries]

	# Get word vectors for each query
	vectors = [query.vector for query in processed_queries]

	# Calculate cosine similarity
	similarity_matrix = cosine_similarity(vectors, vectors)

	# Extract similarity scores for the user query
	user_similarity_scores = similarity_matrix[-1, :-1]

	# Find the index of the tag with the highest similarity score
	most_similar_index = user_similarity_scores.argmax()

	# Get the most similar tag
	most_similar_tag = dataframe['tag'].iloc[most_similar_index]

	# Return the most similar tag and its similarity score
	return most_similar_tag, user_similarity_scores[most_similar_index]

	def main():
	# StreamLit Title
	st.title("TagaCare")

	# React to user input
	if prompt := st.chat_input("Magtanong ng lunas sa sakit"):

	# Use the cached function to get the most similar tag
	returned_tag, returned_score = get_most_similar_tag(prompt, patterns_df)

	st.success(returned_tag + str(returned_score))
	st.success(responses_df[responses_df['tag']==returned_tag]['response'])

	if __name__ == "__main__":
	main()