Uhhy commited on
Commit
18000a9
1 Parent(s): 185b262

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -11
app.py CHANGED
@@ -5,6 +5,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
5
  import uvicorn
6
  from dotenv import load_dotenv
7
  from difflib import SequenceMatcher
 
8
 
9
  load_dotenv()
10
 
@@ -20,6 +21,7 @@ models = [
20
 
21
  # Cargar modelos en memoria solo una vez
22
  llms = [Llama.from_pretrained(repo_id=model['repo_id'], filename=model['filename']) for model in models]
 
23
 
24
  class ChatRequest(BaseModel):
25
  message: str
@@ -29,6 +31,7 @@ class ChatRequest(BaseModel):
29
 
30
  def generate_chat_response(request, llm):
31
  try:
 
32
  user_input = normalize_input(request.message)
33
  response = llm.create_chat_completion(
34
  messages=[{"role": "user", "content": user_input}],
@@ -42,10 +45,11 @@ def generate_chat_response(request, llm):
42
  return {"response": f"Error: {str(e)}", "literal": user_input}
43
 
44
  def normalize_input(input_text):
 
45
  return input_text.strip()
46
 
47
  def select_best_response(responses, request):
48
- coherent_responses = filter_by_coherence(responses, request)
49
  best_response = filter_by_similarity(coherent_responses)
50
  return best_response
51
 
@@ -68,29 +72,32 @@ async def generate_chat(request: ChatRequest):
68
  if not request.message.strip():
69
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
70
 
 
 
 
71
  with ThreadPoolExecutor(max_workers=None) as executor:
 
72
  futures = [executor.submit(generate_chat_response, request, llm) for llm in llms]
73
  responses = []
74
- for future in as_completed(futures):
 
75
  response = future.result()
76
  responses.append(response)
 
77
 
78
- # Verifica si alguna respuesta contiene un error y maneja el error si es necesario
79
  if any("Error" in response['response'] for response in responses):
80
  error_response = next(response for response in responses if "Error" in response['response'])
81
  raise HTTPException(status_code=500, detail=error_response['response'])
82
 
83
- # Extrae las respuestas y las entradas literales
84
- response_texts = [resp['response'] for resp in responses]
85
- literal_inputs = [resp['literal'] for resp in responses]
86
-
87
- # Selecciona la mejor respuesta
88
- best_response = select_best_response(response_texts, request)
89
 
 
 
90
  return {
91
  "best_response": best_response,
92
- "all_responses": response_texts,
93
- "literal_inputs": literal_inputs
94
  }
95
 
96
  if __name__ == "__main__":
 
5
  import uvicorn
6
  from dotenv import load_dotenv
7
  from difflib import SequenceMatcher
8
+ from tqdm import tqdm # Importa tqdm para la barra de progreso
9
 
10
  load_dotenv()
11
 
 
21
 
22
  # Cargar modelos en memoria solo una vez
23
  llms = [Llama.from_pretrained(repo_id=model['repo_id'], filename=model['filename']) for model in models]
24
+ print(f"Modelos cargados: {[model['repo_id'] for model in models]}")
25
 
26
  class ChatRequest(BaseModel):
27
  message: str
 
31
 
32
  def generate_chat_response(request, llm):
33
  try:
34
+ # Normalización del mensaje para manejo robusto
35
  user_input = normalize_input(request.message)
36
  response = llm.create_chat_completion(
37
  messages=[{"role": "user", "content": user_input}],
 
45
  return {"response": f"Error: {str(e)}", "literal": user_input}
46
 
47
  def normalize_input(input_text):
48
+ # Implementar aquí cualquier lógica de normalización que sea necesaria
49
  return input_text.strip()
50
 
51
  def select_best_response(responses, request):
52
+ coherent_responses = filter_by_coherence([resp['response'] for resp in responses], request)
53
  best_response = filter_by_similarity(coherent_responses)
54
  return best_response
55
 
 
72
  if not request.message.strip():
73
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
74
 
75
+ print(f"Procesando solicitud: {request.message}")
76
+
77
+ # Crear un ThreadPoolExecutor para ejecutar las tareas en paralelo
78
  with ThreadPoolExecutor(max_workers=None) as executor:
79
+ # Usar tqdm para mostrar la barra de progreso
80
  futures = [executor.submit(generate_chat_response, request, llm) for llm in llms]
81
  responses = []
82
+
83
+ for future in tqdm(as_completed(futures), total=len(futures), desc="Generando respuestas"):
84
  response = future.result()
85
  responses.append(response)
86
+ print(f"Modelo procesado: {response['literal'][:30]}...") # Muestra los primeros 30 caracteres de la respuesta
87
 
88
+ # Verificar si hay errores en las respuestas
89
  if any("Error" in response['response'] for response in responses):
90
  error_response = next(response for response in responses if "Error" in response['response'])
91
  raise HTTPException(status_code=500, detail=error_response['response'])
92
 
93
+ best_response = select_best_response([resp['response'] for resp in responses], request)
 
 
 
 
 
94
 
95
+ print(f"Mejor respuesta seleccionada: {best_response}")
96
+
97
  return {
98
  "best_response": best_response,
99
+ "all_responses": [resp['response'] for resp in responses],
100
+ "literal_inputs": [resp['literal'] for resp in responses]
101
  }
102
 
103
  if __name__ == "__main__":