RanchiZhao commited on
Commit
6d442c6
1 Parent(s): 3c4a745

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -4
README.md CHANGED
@@ -29,14 +29,18 @@ MiniCPM3-4B has a 32k context window. Equipped with LLMxMapReduce, MiniCPM3-4B c
29
  ```python
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
  import torch
 
32
  path = "openbmb/MiniCPM3-4B-GPTQ-Int4"
33
  device = "cuda"
 
34
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
35
  model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)
 
36
  messages = [
37
  {"role": "user", "content": "推荐5个北京的景点。"},
38
  ]
39
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
 
40
  model_outputs = model.generate(
41
  model_inputs,
42
  max_new_tokens=1024,
@@ -44,9 +48,11 @@ model_outputs = model.generate(
44
  temperature=0.7,
45
  repetition_penalty=1.02
46
  )
 
47
  output_token_ids = [
48
  model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs))
49
  ]
 
50
  responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]
51
  print(responses)
52
  ```
@@ -55,18 +61,23 @@ print(responses)
55
  ```python
56
  from transformers import AutoTokenizer
57
  from vllm import LLM, SamplingParams
 
58
  model_name = "openbmb/MiniCPM3-4B-GPTQ-Int4"
59
  prompt = [{"role": "user", "content": "推荐5个北京的景点。"}]
 
60
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
61
  input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
 
62
  llm = LLM(
63
  model=model_name,
64
  trust_remote_code=True,
65
  tensor_parallel_size=1,
66
- quantization='gptq',
67
  )
68
  sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=1024, repetition_penalty=1.02)
 
69
  outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
 
70
  print(outputs[0].outputs[0].text)
71
  ```
72
 
@@ -226,7 +237,7 @@ print(outputs[0].outputs[0].text)
226
  <td>63.2 </td>
227
  </tr>
228
  <tr>
229
- <td>LiveCodeBench</td>
230
  <td>22.2</td>
231
  <td>20.2</td>
232
  <td>19.2</td>
@@ -239,7 +250,7 @@ print(outputs[0].outputs[0].text)
239
  <td colspan="15" align="left"><strong>Function Call</strong></td>
240
  </tr>
241
  <tr>
242
- <td>BFCL</td>
243
  <td>71.6</td>
244
  <td>70.1</td>
245
  <td>19.2</td>
@@ -263,6 +274,7 @@ print(outputs[0].outputs[0].text)
263
  </tr>
264
  </table>
265
 
 
266
  ## Statement
267
  * As a language model, MiniCPM3-4B generates content by learning from a vast amount of text.
268
  * However, it does not possess the ability to comprehend or express personal opinions or value judgments.
@@ -283,4 +295,4 @@ print(outputs[0].outputs[0].text)
283
  journal={arXiv preprint arXiv:2404.06395},
284
  year={2024}
285
  }
286
- ```
 
29
  ```python
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
  import torch
32
+
33
  path = "openbmb/MiniCPM3-4B-GPTQ-Int4"
34
  device = "cuda"
35
+
36
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
37
  model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.bfloat16, device_map=device, trust_remote_code=True)
38
+
39
  messages = [
40
  {"role": "user", "content": "推荐5个北京的景点。"},
41
  ]
42
  model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
43
+
44
  model_outputs = model.generate(
45
  model_inputs,
46
  max_new_tokens=1024,
 
48
  temperature=0.7,
49
  repetition_penalty=1.02
50
  )
51
+
52
  output_token_ids = [
53
  model_outputs[i][len(model_inputs[i]):] for i in range(len(model_inputs))
54
  ]
55
+
56
  responses = tokenizer.batch_decode(output_token_ids, skip_special_tokens=True)[0]
57
  print(responses)
58
  ```
 
61
  ```python
62
  from transformers import AutoTokenizer
63
  from vllm import LLM, SamplingParams
64
+
65
  model_name = "openbmb/MiniCPM3-4B-GPTQ-Int4"
66
  prompt = [{"role": "user", "content": "推荐5个北京的景点。"}]
67
+
68
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
69
  input_text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
70
+
71
  llm = LLM(
72
  model=model_name,
73
  trust_remote_code=True,
74
  tensor_parallel_size=1,
75
+ quantization='gptq'
76
  )
77
  sampling_params = SamplingParams(top_p=0.7, temperature=0.7, max_tokens=1024, repetition_penalty=1.02)
78
+
79
  outputs = llm.generate(prompts=input_text, sampling_params=sampling_params)
80
+
81
  print(outputs[0].outputs[0].text)
82
  ```
83
 
 
237
  <td>63.2 </td>
238
  </tr>
239
  <tr>
240
+ <td>LiveCodeBench v3</td>
241
  <td>22.2</td>
242
  <td>20.2</td>
243
  <td>19.2</td>
 
250
  <td colspan="15" align="left"><strong>Function Call</strong></td>
251
  </tr>
252
  <tr>
253
+ <td>BFCL v2</td>
254
  <td>71.6</td>
255
  <td>70.1</td>
256
  <td>19.2</td>
 
274
  </tr>
275
  </table>
276
 
277
+
278
  ## Statement
279
  * As a language model, MiniCPM3-4B generates content by learning from a vast amount of text.
280
  * However, it does not possess the ability to comprehend or express personal opinions or value judgments.
 
295
  journal={arXiv preprint arXiv:2404.06395},
296
  year={2024}
297
  }
298
+ ```