vilarin commited on
Commit
4d0e4e3
1 Parent(s): 16b7c93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -3,6 +3,7 @@ import copy
3
  import gradio as gr
4
  import spaces
5
  from llama_cpp import Llama
 
6
  import os
7
  from huggingface_hub import hf_hub_download
8
 
@@ -21,8 +22,9 @@ llm = Llama(
21
  filename=MODEL_FILE,
22
  ),
23
  n_ctx=4096,
24
- n_gpu_layers=-1,
25
- chat_format="gemma",
 
26
  )
27
 
28
  TITLE = "<h1><center>Chatbox</center></h1>"
@@ -61,14 +63,14 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
61
 
62
  print(f"Conversation is -\n{conversation}")
63
 
64
- output = llm.create_chat_completion(
65
  messages=conversation,
66
  top_k=top_k,
67
  top_p=top_p,
68
  repeat_penalty=penalty,
69
  max_tokens=max_new_tokens,
70
  stream =True,
71
- temperature=temperature,
72
  )
73
 
74
  for out in output:
 
3
  import gradio as gr
4
  import spaces
5
  from llama_cpp import Llama
6
+ import llama_cpp.llama_tokenizer
7
  import os
8
  from huggingface_hub import hf_hub_download
9
 
 
22
  filename=MODEL_FILE,
23
  ),
24
  n_ctx=4096,
25
+ n_gpu_layers=-1,
26
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(MODEL_ID),
27
+ verbose=False,
28
  )
29
 
30
  TITLE = "<h1><center>Chatbox</center></h1>"
 
63
 
64
  print(f"Conversation is -\n{conversation}")
65
 
66
+ output = llm(
67
  messages=conversation,
68
  top_k=top_k,
69
  top_p=top_p,
70
  repeat_penalty=penalty,
71
  max_tokens=max_new_tokens,
72
  stream =True,
73
+ temperature=temperature,
74
  )
75
 
76
  for out in output: