File size: 2,885 Bytes
c57a856
db131d7
df11f29
694dd34
 
c55d69f
df11f29
c55d69f
72bb033
c55d69f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0991258
 
c55d69f
0991258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694dd34
 
 
 
0991258
7ff91e3
 
 
 
52cc43b
7ff91e3
 
17eaeb4
694dd34
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import streamlit as st
import tl_calamancy_lg
import os
import pandas as pd
import json
from sklearn.metrics.pairwise import cosine_similarity

# Define the cache decorator for loading the spaCy model
@st.cache_resource()
def load_nlp_model():
    return tl_calamancy_lg.load()

# Load the spaCy model using the cached function
nlp = load_nlp_model()

# Define the cache decorator for loading the DataFrame
@st.cache_data
def load_data(file_path):
    # Open the JSON file
    with open(file_path, 'r') as file:
        # Load the JSON data
        data = json.load(file)

    # Extract patterns and responses into separate lists
    patterns_data = []
    responses_data = []

    for intent in data["intents"]:
        tag = intent["tag"]
        patterns = intent.get("patterns", [])
        responses = intent.get("responses", [])

        for pattern in patterns:
            patterns_data.append({"tag": tag, "pattern": pattern})

        for response in responses:
            responses_data.append({"tag": tag, "response": response})

    # Create and return DataFrames
    patterns_df = pd.DataFrame(patterns_data)
    responses_df = pd.DataFrame(responses_data)
    return patterns_df, responses_df

# Get the absolute path of the script directory
cwd = os.getcwd()

# Read the CSV file
file_path = os.path.join(cwd, "dataset_v2.json")

# Load the DataFrames using the cached function
patterns_df, responses_df = load_data(file_path)

# Define the cache decorator for the similarity function
@st.cache_data
def get_most_similar_tag(user_query, dataframe):
    # Process user query and existing queries with spaCy
    all_queries = list(dataframe['pattern']) + [user_query]
    processed_queries = [nlp(query) for query in all_queries]

    # Get word vectors for each query
    vectors = [query.vector for query in processed_queries]

    # Calculate cosine similarity
    similarity_matrix = cosine_similarity(vectors, vectors)

    # Extract similarity scores for the user query
    user_similarity_scores = similarity_matrix[-1, :-1]

    # Find the index of the tag with the highest similarity score
    most_similar_index = user_similarity_scores.argmax()

    # Get the most similar tag
    most_similar_tag = dataframe['tag'].iloc[most_similar_index]

    # Return the most similar tag and its similarity score
    return most_similar_tag, user_similarity_scores[most_similar_index]

def main():
    # StreamLit Title
    st.title("TagaCare")

    # React to user input
    if prompt := st.chat_input("Magtanong ng lunas sa sakit"):
        
        # Use the cached function to get the most similar tag
        returned_tag, returned_score = get_most_similar_tag(prompt, patterns_df)
        
        st.success(returned_tag + str(returned_score))
        st.success(responses_df[responses_df['tag']==returned_tag]['response'])

if __name__ == "__main__":
    main()