aixsatoshi commited on
Commit
9e9c8af
1 Parent(s): bcc0940

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -38
app.py CHANGED
@@ -1,26 +1,24 @@
1
- import torch
2
- from PIL import Image
3
- import gradio as gr
4
  import spaces
 
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
- import os
7
  from threading import Thread
8
 
9
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
- MODEL_ID = "aixsatoshi/Llama-3-Elyza-Youko-moe-2x8B"
11
- MODELS = os.environ.get("MODELS")
12
- MODEL_NAME = MODELS.split("/")[-1]
 
 
 
 
13
 
14
- TITLE = "<h1><center>Llama-3-Elyza-Youko-moe-2x8B Chat webui</center></h1>"
15
 
16
- DESCRIPTION = f"""
17
- <h3>MODEL: <a href="https://hf.co/{MODELS}">{MODEL_NAME}</a></h3>
18
  <center>
19
- <p>Llama-3-Elyza-JA-8B is the large language model built by Elyza.
20
- <p>Llama-3-youko-8B is the large language model built by rinna.
21
- <br>
22
- Feel free to test without log.
23
- </p>
24
  </center>
25
  """
26
 
@@ -42,24 +40,15 @@ h3 {
42
  }
43
  """
44
 
45
-
46
- model = AutoModelForCausalLM.from_pretrained(
47
- MODEL_ID,
48
- torch_dtype=torch.float16,
49
- device_map="auto",
50
- )
51
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
52
-
53
- @spaces.GPU
54
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
55
- print(f'message is - {message}')
56
- print(f'history is - {history}')
 
57
  conversation = []
58
  for prompt, answer in history:
59
  conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
60
  conversation.append({"role": "user", "content": message})
61
-
62
- #print(f"Conversation is -\n{conversation}")
63
 
64
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
65
  inputs = tokenizer(input_ids, return_tensors="pt").to(0)
@@ -75,7 +64,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
75
  max_new_tokens=max_new_tokens,
76
  do_sample=True,
77
  temperature=temperature,
78
- eos_token_id = [128001, 128009],
79
  )
80
 
81
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
@@ -86,8 +75,6 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
86
  buffer += new_text
87
  yield buffer
88
 
89
-
90
-
91
  chatbot = gr.Chatbot(height=500)
92
 
93
  with gr.Blocks(css=CSS) as demo:
@@ -145,15 +132,14 @@ with gr.Blocks(css=CSS) as demo:
145
  ),
146
  ],
147
  examples=[
148
- ["超能力を持つ主人公のSF物語のシナリオを考えてください。伏線の設定、テーマやログラインを理論的に使用してください"],
149
- ["子供の夏休みの自由研究のための、5つのアイデアと、その手法を簡潔に教えてください。"],
150
- ["パズルゲームのスクリプト作成のためにアドバイスお願いします"],
151
- ["マークダウン記法にて、ブロック崩しのゲーム作成の教科書作成してください"],
152
  ],
153
  cache_examples=False,
154
  )
155
 
156
-
157
-
158
  if __name__ == "__main__":
159
  demo.launch()
 
 
 
 
 
1
  import spaces
2
+ import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
+ import gradio as gr
5
  from threading import Thread
6
 
7
+ model_id = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_id,
11
+ torch_dtype=torch.float16,
12
+ low_cpu_mem_usage=True,
13
+ device_map="auto",
14
+ )
15
 
16
+ TITLE = "<h1><center>Meta-Llama-3.1-70B-Instruct-AWQ-INT4 Chat webui</center></h1>"
17
 
18
+ DESCRIPTION = """
19
+ <h3>MODEL: <a href="https://hf.co/hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4">Meta-Llama-3.1-70B-Instruct-AWQ-INT4</a></h3>
20
  <center>
21
+ <p>This model is designed for conversational interactions.</p>
 
 
 
 
22
  </center>
23
  """
24
 
 
40
  }
41
  """
42
 
43
+ @gr.GPU
 
 
 
 
 
 
 
 
44
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
45
+ print(f'Message: {message}')
46
+ print(f'History: {history}')
47
+
48
  conversation = []
49
  for prompt, answer in history:
50
  conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
51
  conversation.append({"role": "user", "content": message})
 
 
52
 
53
  input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
54
  inputs = tokenizer(input_ids, return_tensors="pt").to(0)
 
64
  max_new_tokens=max_new_tokens,
65
  do_sample=True,
66
  temperature=temperature,
67
+ eos_token_id=[128001, 128009],
68
  )
69
 
70
  thread = Thread(target=model.generate, kwargs=generate_kwargs)
 
75
  buffer += new_text
76
  yield buffer
77
 
 
 
78
  chatbot = gr.Chatbot(height=500)
79
 
80
  with gr.Blocks(css=CSS) as demo:
 
132
  ),
133
  ],
134
  examples=[
135
+ ["Explain Deep Learning as a pirate."],
136
+ ["Give me five ideas for a child's summer science project."],
137
+ ["Provide advice for writing a script for a puzzle game."],
138
+ ["Create a tutorial for building a breakout game using markdown."],
139
  ],
140
  cache_examples=False,
141
  )
142
 
 
 
143
  if __name__ == "__main__":
144
  demo.launch()
145
+