Neura commited on
Commit
4c629b9
1 Parent(s): 83175c0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +43 -0
README.md CHANGED
@@ -1,3 +1,46 @@
1
  ---
2
  license: apache-2.0
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ language:
4
+ - fa
5
  ---
6
+ <code>
7
+
8
+ !pip install -q sentencepiece
9
+ !pip install -q accelerate
10
+ !pip install --upgrade -q bitsandbytes
11
+
12
+ import torch
13
+ from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
14
+
15
+ model_path = "Neurai/llama7b"
16
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ "Neurai/llama7b",
19
+ # load_in_8bit=True,
20
+ torch_dtype=torch.bfloat16,
21
+ low_cpu_mem_usage=True,
22
+ device_map="auto",
23
+ )
24
+ model.eval()
25
+ print('model loaded')
26
+
27
+ SYS_PROMPT = "زرافه چند سال عمر میکند؟"
28
+
29
+ def response_generate(input_prompt):
30
+ input_ids = tokenizer(input_prompt, return_tensors="pt")
31
+ outputs = model.generate(
32
+ inputs=input_ids["input_ids"].to("cuda"),
33
+ attention_mask=input_ids["attention_mask"].to("cuda"),
34
+ do_sample=True,
35
+ temperature=0.3,
36
+ top_k=50,
37
+ top_p=0.9,
38
+ max_new_tokens=512,
39
+ eos_token_id=tokenizer.eos_token_id,
40
+ pad_token_id=tokenizer.pad_token_id
41
+ )
42
+ response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
43
+ return response
44
+
45
+ print(response_generate(f"{SYS_PROMPT}"))
46
+ </code>