Spaces:
Sleeping
Sleeping
jordigonzm
commited on
Commit
•
fc439e1
1
Parent(s):
2728c6d
chatbot con streaming
Browse files
app.py
CHANGED
@@ -67,14 +67,14 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
67 |
print(f'Mensaje: {message}')
|
68 |
print(f'Historia: {history}')
|
69 |
|
70 |
-
# Limpieza
|
71 |
cleaned_history = [[prompt, answer if answer is not None else ""] for prompt, answer in history]
|
72 |
|
|
|
|
|
73 |
# Verificar y asignar pad_token_id si es None
|
74 |
if tokenizer.pad_token_id is None:
|
75 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
76 |
-
|
77 |
-
stop = StopOnTokens()
|
78 |
|
79 |
# Preparar los input_ids y manejar la máscara de atención
|
80 |
input_ids = tokenizer.encode(message, return_tensors='pt').to(next(model.parameters()).device)
|
@@ -83,7 +83,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
83 |
|
84 |
generate_kwargs = dict(
|
85 |
input_ids=input_ids,
|
86 |
-
attention_mask=attention_mask,
|
87 |
streamer=streamer,
|
88 |
max_new_tokens=max_new_tokens,
|
89 |
do_sample=True,
|
@@ -91,7 +91,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
91 |
temperature=temperature,
|
92 |
repetition_penalty=1.1,
|
93 |
stopping_criteria=StoppingCriteriaList([stop]),
|
94 |
-
pad_token_id=tokenizer.
|
95 |
)
|
96 |
|
97 |
# Ejecutar la generación de tokens en un hilo separado
|
@@ -103,7 +103,9 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
103 |
for new_token in streamer:
|
104 |
if new_token:
|
105 |
buffer += new_token
|
106 |
-
#
|
|
|
|
|
107 |
yield cleaned_history + [[message, buffer]]
|
108 |
|
109 |
|
|
|
67 |
print(f'Mensaje: {message}')
|
68 |
print(f'Historia: {history}')
|
69 |
|
70 |
+
# Limpieza del historial para evitar pares con 'None'
|
71 |
cleaned_history = [[prompt, answer if answer is not None else ""] for prompt, answer in history]
|
72 |
|
73 |
+
stop = StopOnTokens()
|
74 |
+
|
75 |
# Verificar y asignar pad_token_id si es None
|
76 |
if tokenizer.pad_token_id is None:
|
77 |
tokenizer.pad_token_id = tokenizer.eos_token_id
|
|
|
|
|
78 |
|
79 |
# Preparar los input_ids y manejar la máscara de atención
|
80 |
input_ids = tokenizer.encode(message, return_tensors='pt').to(next(model.parameters()).device)
|
|
|
83 |
|
84 |
generate_kwargs = dict(
|
85 |
input_ids=input_ids,
|
86 |
+
attention_mask=attention_mask,
|
87 |
streamer=streamer,
|
88 |
max_new_tokens=max_new_tokens,
|
89 |
do_sample=True,
|
|
|
91 |
temperature=temperature,
|
92 |
repetition_penalty=1.1,
|
93 |
stopping_criteria=StoppingCriteriaList([stop]),
|
94 |
+
pad_token_id=tokenizer.pad_token_id
|
95 |
)
|
96 |
|
97 |
# Ejecutar la generación de tokens en un hilo separado
|
|
|
103 |
for new_token in streamer:
|
104 |
if new_token:
|
105 |
buffer += new_token
|
106 |
+
# Asegúrate de que solo estás trabajando con texto puro
|
107 |
+
buffer = buffer.strip() # Eliminar espacios innecesarios
|
108 |
+
# Emitir el texto acumulado en un formato compatible con Gradio: [[Mensaje del usuario, Respuesta del bot]]
|
109 |
yield cleaned_history + [[message, buffer]]
|
110 |
|
111 |
|