File size: 1,334 Bytes
71eda90
 
4c629b9
 
71eda90
4c629b9
650152c
 
 
 
 
 
 
 
 
4c629b9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650152c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
---
license: apache-2.0
language:
- fa
---

### Installing Libraries
Make sure these libraries are installed correctly.
* ```pip install -q sentencepiece ```
* ```pip install -q transformers ```
* ```pip install -q accelerate ```
* ```pip install --upgrade -q bitsandbytes ```


```python

import torch
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer

model_path = "Neurai/llama7b"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    "Neurai/llama7b",
    # load_in_8bit=True,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    device_map="auto",
    )
model.eval()
print('model loaded')

SYS_PROMPT = "زرافه چند سال عمر میکند؟"

def response_generate(input_prompt):
    input_ids = tokenizer(input_prompt, return_tensors="pt")
    outputs = model.generate(
        inputs=input_ids["input_ids"].to("cuda"),
        attention_mask=input_ids["attention_mask"].to("cuda"),
        do_sample=True,
        temperature=0.3,
        top_k=50, 
        top_p=0.9,
        max_new_tokens=512,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    return response

print(response_generate(f"{SYS_PROMPT}"))
```