Med Tiouti commited on
Commit
94bff1a
1 Parent(s): 2009b2e

Test Successful deployment with 13b

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +4 -4
README.md CHANGED
@@ -9,7 +9,7 @@ app_file: app.py
9
  pinned: false
10
  models:
11
  - sentence-transformers/all-MiniLM-L6-v2
12
- - daryl149/llama-2-7b-chat-hf
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  pinned: false
10
  models:
11
  - sentence-transformers/all-MiniLM-L6-v2
12
+ - daryl149/llama-2-13b-chat-hf
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -22,19 +22,19 @@ from langchain.vectorstores import FAISS
22
 
23
 
24
  def get_model():
25
- model_repo = 'daryl149/llama-2-7b-chat-hf'
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
28
 
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_repo,
31
  device_map='auto',
32
- offload_folder="offload",
33
  torch_dtype=torch.float16,
34
  low_cpu_mem_usage=True,
35
  trust_remote_code=True
36
  )
37
- max_len = 2048
38
 
39
  return tokenizer,model,max_len
40
 
@@ -159,4 +159,4 @@ demo = gr.ChatInterface(
159
  )
160
 
161
  demo.queue()
162
- demo.launch(debug=True)
 
22
 
23
 
24
  def get_model():
25
+ model_repo = 'daryl149/llama-2-13b-chat-hf'
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
28
 
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_repo,
31
  device_map='auto',
32
+ load_in_4bit=True,
33
  torch_dtype=torch.float16,
34
  low_cpu_mem_usage=True,
35
  trust_remote_code=True
36
  )
37
+ max_len = 8192
38
 
39
  return tokenizer,model,max_len
40
 
 
159
  )
160
 
161
  demo.queue()
162
+ demo.launch(debug=True,share=True)