Spaces:
Runtime error
Runtime error
File size: 3,838 Bytes
e20d7fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
# -*- coding: utf-8 -*-
"""Proyecto-buscar-pelicula.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1gfkDWGdNI04qm8HP1wp0dTGy40UmhOQG
"""
# Commented out IPython magic to ensure Python compatibility.
# %%capture
# !pip install -U sentence-transformers
# !pip install gradio chromadb
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from ast import literal_eval
import chromadb
from chromadb.utils import embedding_functions
import gdown
url = 'https://drive.google.com/uc?id='
file_id = '1MgM3iObIAdqA-SvI-pXeUeXEiEAuMzXw'
output = '25k IMDb movie Dataset.csv'
gdown.download(url+file_id, output, quiet=False)
df = pd.read_csv(output)
def concatenar_lista(lista):
lista = literal_eval(lista)
return ' '.join(lista)
def string_to_list(lista):
lista = literal_eval(lista)
return lista
df = df.fillna(' ')
df['Keywords'] = df['Plot Kyeword'].apply(concatenar_lista)
df['Stars'] = df['Top 5 Casts'].apply(concatenar_lista)
df['Generes'] = df['Generes'].apply(string_to_list)
df['Rating'] = pd.to_numeric(df['Rating'], errors="coerce").fillna(0).astype("float")
unique_generes = df['Generes'].explode().unique()
df.drop(['Plot Kyeword','Top 5 Casts'],axis=1, inplace=True)
df['text'] = df.apply(lambda x: str(x['Overview']) + ' ' + x['Keywords'] + ' ' + x['Stars'], axis=1)
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(df['text'],batch_size=64,show_progress_bar=True)
df['embeddings'] = embeddings.tolist()
df['ids'] = df.index
df['ids'] = df['ids'].astype('str')
client_persistent = chromadb.PersistentClient(path='data_embeddings')
db = client_persistent.create_collection(name='movies_db')
df['Generes'] = df['Generes'].apply(lambda x: ', '.join(x))
from torch import embedding
db.add(
ids = df['ids'].tolist(),
embeddings = df['embeddings'].tolist(),
metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1).to_dict('records')
)
from chromadb.api.types import Metadatas
def search(query, genre, rating, num):
num = int(num)
if rating:
filter_rating = rating
else:
filter_rating = 0
if genre:
conditions = {
"$and": [
{"Generes": genre},
{"Rating": {"$gte": filter_rating}}
]
}
else:
conditions = {
"Rating": {"$gte": filter_rating}
}
responses = db.query(
query_texts=[query],
n_results=num,
where=conditions,
include=['metadatas']
)
response_data = []
for response in responses['metadatas']:
for metadata in response:
if not isinstance(genre, list):
genre = [genre]
response_data.append({
'Title': metadata['movie title'],
'Overview': metadata['Overview'],
'Director': metadata['Director'],
'Stars': metadata['Stars'],
'Genre': metadata['Generes'],
'year': metadata['year'],
'Rating': metadata['Rating']
})
df = pd.DataFrame(response_data)
return df
import gradio as gr
genres = unique_generes.tolist()
iface = gr.Interface(
fn=search,
inputs=[
gr.Textbox(lines=5, placeholder="Escribe aquí tu consulta...", label="Consulta"),
gr.Dropdown(choices=genres, label="Género de la película"),
gr.Slider(minimum=1, maximum=10, value=5, label="Puntuación mínima"),
gr.Number(minimum=1, maximum=10, value=3, label="Número de resultados")
],
outputs=gr.Dataframe(type="pandas",label="Resultados"),
title="Buscador de películas",
description="Introduce tu consulta, selecciona un género y define una puntuación mínima para buscar películas."
)
iface.launch(share=False) |