Update README.md
Browse files
README.md
CHANGED
@@ -2,12 +2,49 @@
|
|
2 |
license: apache-2.0
|
3 |
datasets:
|
4 |
- OpenAssistant/oasst_top1_2023-08-25
|
|
|
5 |
---
|
|
|
6 |
|
7 |
* state-spaces/mamba-1.4b finetuned on Open Assistant conversations, 3 epochs
|
8 |
* talks ChatML (w/o system message)
|
9 |
-
*
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
example conversation:
|
13 |
|
|
|
2 |
license: apache-2.0
|
3 |
datasets:
|
4 |
- OpenAssistant/oasst_top1_2023-08-25
|
5 |
+
pipeline_tag: text-generation
|
6 |
---
|
7 |
+
**this is a test, not a useful SOTA bot**
|
8 |
|
9 |
* state-spaces/mamba-1.4b finetuned on Open Assistant conversations, 3 epochs
|
10 |
* talks ChatML (w/o system message)
|
11 |
+
* training code: https://github.com/geronimi73/mamba
|
12 |
+
|
13 |
+
inference:
|
14 |
+
```python
|
15 |
+
import torch
|
16 |
+
from transformers import AutoTokenizer
|
17 |
+
from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
|
18 |
+
|
19 |
+
modelpath="g-ronimo/mamba-1.4b-OA"
|
20 |
+
|
21 |
+
model = MambaLMHeadModel.from_pretrained(
|
22 |
+
modelpath,
|
23 |
+
dtype=torch.bfloat16,
|
24 |
+
device="cuda"
|
25 |
+
)
|
26 |
+
|
27 |
+
tokenizer = AutoTokenizer.from_pretrained(modelpath)
|
28 |
+
|
29 |
+
question="Why am I so tired?"
|
30 |
+
|
31 |
+
template="<|im_start|>user\n{q}\n<|im_end|>\n<|im_start|>assistant"
|
32 |
+
prompt=template.format(q=question)
|
33 |
+
prompt_tokenized=tokenizer(prompt, return_tensors="pt").to("cuda")["input_ids"]
|
34 |
+
output_tokenized = model.generate(
|
35 |
+
input_ids=prompt_tokenized,
|
36 |
+
max_length=len(prompt_tokenized[0])+100,
|
37 |
+
cg=True,
|
38 |
+
output_scores=True,
|
39 |
+
enable_timing=False,
|
40 |
+
temperature=0.7,
|
41 |
+
top_k=40,
|
42 |
+
top_p=0.1,
|
43 |
+
)
|
44 |
+
answer=tokenizer.decode(output_tokenized[0])
|
45 |
+
|
46 |
+
print(answer)
|
47 |
+
```
|
48 |
|
49 |
example conversation:
|
50 |
|