import pandas as pd from sentence_transformers import SentenceTransformer import gradio as gr import spacy import subprocess # Run the spacy model download command # try: # Try to load the model to check if it's already installed # nlp = spacy.load("en_core_web_trf") # except OSError: # If the model is not found, download it subprocess.run(["python", "-m", "spacy", "download", "en_core_web_trf"]) nlp = spacy.load("en_core_web_trf") model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True) df_new = pd.read_csv('last_df.csv') df_new['country'] = df_new['country'].replace('Türkiye', 'Turkey') # # # Function to extract city name from the query def get_city_name(query): text_query = nlp(query) for city in text_query.ents: if city.label_ == "GPE": return city.text.lower() return None # Function to filter DataFrame by location def filter_by_loc(query): city_name = get_city_name(query) if city_name in df_new['locality'].str.lower().unique(): filtered_df = df_new[df_new['locality'].str.lower() == city_name.lower()] return filtered_df else: return df_new import torch.nn as nn import torch import ast # Function to calculate similarity score def get_similarity_score(row, query_embedding): similarity = nn.CosineSimilarity(dim=0) # Use dim=0 for 1D tensors # Safely evaluate string representations of lists rating_value_embedding = torch.tensor(ast.literal_eval(row['rating_value_embedding'])) hotel_combined_embedding = torch.tensor(ast.literal_eval(row['hotel_combined_embedding'])) review_embedding = torch.tensor(ast.literal_eval(row['review_embedding'])) sim1 = similarity(rating_value_embedding, query_embedding) sim2 = similarity(hotel_combined_embedding, query_embedding) sim3 = similarity(review_embedding, query_embedding) return sim1.item() + sim2.item() + sim3.item() # Main function to process the query and return results def process_query(query): query_embedding = model.encode(query) # Filter DataFrame by location filtered_data = filter_by_loc(query) # Convert query_embedding to a tensor if it is not already query_embedding_tensor = torch.tensor(query_embedding) # Apply the similarity function to the filtered DataFrame filtered_data['similarity_score'] = filtered_data.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1) # df_new['similarity_score'] = df_new.apply(lambda row: get_similarity_score(row, query_embedding_tensor), axis=1) top_similar = filtered_data.sort_values('similarity_score', ascending=False).head(1) hotel_name = top_similar['hotel_name'].values[0] hotel_description = top_similar['hotel_description'].values[0] hotel_rate = top_similar['rate'].values[0] hotel_price_range = top_similar['price_range'].values[0] hotel_review = top_similar['review_title'].values[0] hotel_city = top_similar['locality'].values[0] hotel_country = top_similar['country'].values[0] # Format the output result = "Here's the most similar hotel we found:\n" result += "-" * 30 + "\n" result += f"Hotel Name: {hotel_name}\n" result += f"City: {hotel_city}\n" result += f"Country: {hotel_country}\n" result += f"Star Rating: {hotel_rate}\n" result += f"Price Range: {hotel_price_range}\n" return result ui = gr.Interface( fn=process_query, inputs=gr.Textbox(label="Query", placeholder="Enter your query"), outputs="text", title="Hotel Similarity Finder", description="Enter a query to find similar hotels." ) ui.launch()