aixsatoshi commited on
Commit
da8a347
1 Parent(s): d65d6d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -9,8 +9,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
  torch_dtype=torch.float16,
12
- low_cpu_mem_usage=True,
13
- device_map="auto",
 
14
  )
15
 
16
  TITLE = "<h1><center>Meta-Llama-3.1-70B-Instruct-AWQ-INT4 Chat webui</center></h1>"
@@ -40,7 +41,7 @@ h3 {
40
  }
41
  """
42
 
43
- @spaces.GPU(duration=120)
44
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
45
  print(f'Message: {message}')
46
  print(f'History: {history}')
@@ -142,4 +143,3 @@ with gr.Blocks(css=CSS) as demo:
142
 
143
  if __name__ == "__main__":
144
  demo.launch()
145
-
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
  torch_dtype=torch.float16,
12
+ device_map="sequential",
13
+ offload_folder="offload", # オフロードフォルダの指定
14
+ offload_state_dict=True # 必要に応じてstate_dictをオフロード
15
  )
16
 
17
  TITLE = "<h1><center>Meta-Llama-3.1-70B-Instruct-AWQ-INT4 Chat webui</center></h1>"
 
41
  }
42
  """
43
 
44
+ @gr.GPU
45
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
46
  print(f'Message: {message}')
47
  print(f'History: {history}')
 
143
 
144
  if __name__ == "__main__":
145
  demo.launch()