rriverar75 commited on
Commit
e20d7fd
1 Parent(s): e656661

Upload proyecto_buscar_pelicula.py

Browse files
Files changed (1) hide show
  1. proyecto_buscar_pelicula.py +145 -0
proyecto_buscar_pelicula.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Proyecto-buscar-pelicula.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1gfkDWGdNI04qm8HP1wp0dTGy40UmhOQG
8
+ """
9
+
10
+ # Commented out IPython magic to ensure Python compatibility.
11
+ # %%capture
12
+ # !pip install -U sentence-transformers
13
+ # !pip install gradio chromadb
14
+
15
+ import pandas as pd
16
+ from sentence_transformers import SentenceTransformer, util
17
+ from ast import literal_eval
18
+ import chromadb
19
+ from chromadb.utils import embedding_functions
20
+
21
+ import gdown
22
+
23
+ url = 'https://drive.google.com/uc?id='
24
+ file_id = '1MgM3iObIAdqA-SvI-pXeUeXEiEAuMzXw'
25
+ output = '25k IMDb movie Dataset.csv'
26
+
27
+ gdown.download(url+file_id, output, quiet=False)
28
+
29
+ df = pd.read_csv(output)
30
+
31
+ def concatenar_lista(lista):
32
+ lista = literal_eval(lista)
33
+ return ' '.join(lista)
34
+
35
+ def string_to_list(lista):
36
+ lista = literal_eval(lista)
37
+ return lista
38
+
39
+ df = df.fillna(' ')
40
+
41
+ df['Keywords'] = df['Plot Kyeword'].apply(concatenar_lista)
42
+
43
+ df['Stars'] = df['Top 5 Casts'].apply(concatenar_lista)
44
+
45
+ df['Generes'] = df['Generes'].apply(string_to_list)
46
+
47
+ df['Rating'] = pd.to_numeric(df['Rating'], errors="coerce").fillna(0).astype("float")
48
+
49
+ unique_generes = df['Generes'].explode().unique()
50
+
51
+ df.drop(['Plot Kyeword','Top 5 Casts'],axis=1, inplace=True)
52
+
53
+ df['text'] = df.apply(lambda x: str(x['Overview']) + ' ' + x['Keywords'] + ' ' + x['Stars'], axis=1)
54
+
55
+ model = SentenceTransformer('all-MiniLM-L6-v2')
56
+
57
+ embeddings = model.encode(df['text'],batch_size=64,show_progress_bar=True)
58
+
59
+ df['embeddings'] = embeddings.tolist()
60
+
61
+ df['ids'] = df.index
62
+
63
+ df['ids'] = df['ids'].astype('str')
64
+
65
+ client_persistent = chromadb.PersistentClient(path='data_embeddings')
66
+
67
+ db = client_persistent.create_collection(name='movies_db')
68
+
69
+ df['Generes'] = df['Generes'].apply(lambda x: ', '.join(x))
70
+
71
+ from torch import embedding
72
+ db.add(
73
+ ids = df['ids'].tolist(),
74
+ embeddings = df['embeddings'].tolist(),
75
+ metadatas = df.drop(['ids', 'embeddings', 'text'], axis=1).to_dict('records')
76
+ )
77
+
78
+ from chromadb.api.types import Metadatas
79
+
80
+ def search(query, genre, rating, num):
81
+ num = int(num)
82
+ if rating:
83
+ filter_rating = rating
84
+ else:
85
+ filter_rating = 0
86
+
87
+ if genre:
88
+ conditions = {
89
+ "$and": [
90
+ {"Generes": genre},
91
+ {"Rating": {"$gte": filter_rating}}
92
+ ]
93
+ }
94
+ else:
95
+ conditions = {
96
+ "Rating": {"$gte": filter_rating}
97
+ }
98
+
99
+ responses = db.query(
100
+ query_texts=[query],
101
+ n_results=num,
102
+ where=conditions,
103
+ include=['metadatas']
104
+
105
+ )
106
+
107
+ response_data = []
108
+
109
+ for response in responses['metadatas']:
110
+ for metadata in response:
111
+ if not isinstance(genre, list):
112
+ genre = [genre]
113
+ response_data.append({
114
+ 'Title': metadata['movie title'],
115
+ 'Overview': metadata['Overview'],
116
+ 'Director': metadata['Director'],
117
+ 'Stars': metadata['Stars'],
118
+ 'Genre': metadata['Generes'],
119
+ 'year': metadata['year'],
120
+ 'Rating': metadata['Rating']
121
+ })
122
+
123
+
124
+ df = pd.DataFrame(response_data)
125
+
126
+ return df
127
+
128
+ import gradio as gr
129
+
130
+ genres = unique_generes.tolist()
131
+ iface = gr.Interface(
132
+ fn=search,
133
+ inputs=[
134
+ gr.Textbox(lines=5, placeholder="Escribe aquí tu consulta...", label="Consulta"),
135
+ gr.Dropdown(choices=genres, label="Género de la película"),
136
+ gr.Slider(minimum=1, maximum=10, value=5, label="Puntuación mínima"),
137
+ gr.Number(minimum=1, maximum=10, value=3, label="Número de resultados")
138
+
139
+ ],
140
+ outputs=gr.Dataframe(type="pandas",label="Resultados"),
141
+ title="Buscador de películas",
142
+ description="Introduce tu consulta, selecciona un género y define una puntuación mínima para buscar películas."
143
+ )
144
+
145
+ iface.launch(share=False)