vdaita commited on
Commit
6021596
1 Parent(s): bd07c3f

Training in progress, epoch 1

Browse files
adapter_config.json CHANGED
@@ -3,15 +3,15 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "deepseek-ai/deepseek-coder-6.7b-instruct",
5
  "bias": "none",
6
- "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
  "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 32,
14
- "lora_dropout": 0,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": [
@@ -19,17 +19,17 @@
19
  "lm_head"
20
  ],
21
  "peft_type": "LORA",
22
- "r": 128,
23
  "rank_pattern": {},
24
- "revision": "unsloth",
25
  "target_modules": [
 
 
 
 
26
  "down_proj",
27
  "o_proj",
28
- "v_proj",
29
- "q_proj",
30
- "k_proj",
31
- "up_proj",
32
- "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "deepseek-ai/deepseek-coder-6.7b-instruct",
5
  "bias": "none",
6
+ "fan_in_fan_out": null,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
  "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": [
 
19
  "lm_head"
20
  ],
21
  "peft_type": "LORA",
22
+ "r": 32,
23
  "rank_pattern": {},
24
+ "revision": null,
25
  "target_modules": [
26
+ "up_proj",
27
+ "q_proj",
28
+ "v_proj",
29
+ "gate_proj",
30
  "down_proj",
31
  "o_proj",
32
+ "k_proj"
 
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7d2ea2c53bae73716e36ef6ef01ee5cad72301210222229e23ef62077ab397f
3
- size 2336289560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:588591505247958507363d98bfb2535733c13dde0ff41a2d5c05267a8ab37eb1
3
+ size 848358568
config.json CHANGED
@@ -5,8 +5,8 @@
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 32013,
9
- "eos_token_id": 32021,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
@@ -17,6 +17,21 @@
17
  "num_hidden_layers": 32,
18
  "num_key_value_heads": 32,
19
  "pretraining_tp": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": {
22
  "factor": 4.0,
@@ -26,7 +41,6 @@
26
  "tie_word_embeddings": false,
27
  "torch_dtype": "bfloat16",
28
  "transformers_version": "4.40.0.dev0",
29
- "unsloth_version": "2024.6",
30
- "use_cache": true,
31
  "vocab_size": 32256
32
  }
 
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
+ "bos_token_id": 32022,
9
+ "eos_token_id": 32023,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
 
17
  "num_hidden_layers": 32,
18
  "num_key_value_heads": 32,
19
  "pretraining_tp": 1,
20
+ "quantization_config": {
21
+ "_load_in_4bit": false,
22
+ "_load_in_8bit": true,
23
+ "bnb_4bit_compute_dtype": "float32",
24
+ "bnb_4bit_quant_storage": "uint8",
25
+ "bnb_4bit_quant_type": "fp4",
26
+ "bnb_4bit_use_double_quant": false,
27
+ "llm_int8_enable_fp32_cpu_offload": false,
28
+ "llm_int8_has_fp16_weight": false,
29
+ "llm_int8_skip_modules": null,
30
+ "llm_int8_threshold": 6.0,
31
+ "load_in_4bit": false,
32
+ "load_in_8bit": true,
33
+ "quant_method": "bitsandbytes"
34
+ },
35
  "rms_norm_eps": 1e-06,
36
  "rope_scaling": {
37
  "factor": 4.0,
 
41
  "tie_word_embeddings": false,
42
  "torch_dtype": "bfloat16",
43
  "transformers_version": "4.40.0.dev0",
44
+ "use_cache": false,
 
45
  "vocab_size": 32256
46
  }
special_tokens_map.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "bos_token": {
3
- "content": "<|begin▁of▁sentence|>",
4
  "lstrip": false,
5
- "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|EOT|>",
11
  "lstrip": false,
12
- "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|end▁of▁sentence|>",
18
  "lstrip": false,
19
- "normalized": true,
20
  "rstrip": false,
21
  "single_word": false
22
  }
 
1
  {
2
  "bos_token": {
3
+ "content": "<|begin_of_sentence|>",
4
  "lstrip": false,
5
+ "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|end_of_sentence|>",
11
  "lstrip": false,
12
+ "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<|end_of_sentence|>",
18
  "lstrip": false,
19
+ "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  }
tokenizer.json CHANGED
@@ -200,6 +200,24 @@
200
  "rstrip": false,
201
  "normalized": true,
202
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  }
204
  ],
205
  "normalizer": {
@@ -258,7 +276,7 @@
258
  "single": [
259
  {
260
  "SpecialToken": {
261
- "id": "<|begin▁of▁sentence|>",
262
  "type_id": 0
263
  }
264
  },
@@ -272,7 +290,7 @@
272
  "pair": [
273
  {
274
  "SpecialToken": {
275
- "id": "<|begin▁of▁sentence|>",
276
  "type_id": 0
277
  }
278
  },
@@ -284,7 +302,7 @@
284
  },
285
  {
286
  "SpecialToken": {
287
- "id": "<|begin▁of▁sentence|>",
288
  "type_id": 1
289
  }
290
  },
@@ -296,13 +314,13 @@
296
  }
297
  ],
298
  "special_tokens": {
299
- "<|begin▁of▁sentence|>": {
300
- "id": "<|begin▁of▁sentence|>",
301
  "ids": [
302
- 32013
303
  ],
304
  "tokens": [
305
- "<|begin▁of▁sentence|>"
306
  ]
307
  }
308
  }
 
200
  "rstrip": false,
201
  "normalized": true,
202
  "special": true
203
+ },
204
+ {
205
+ "id": 32022,
206
+ "content": "<|begin_of_sentence|>",
207
+ "single_word": false,
208
+ "lstrip": false,
209
+ "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
+ },
213
+ {
214
+ "id": 32023,
215
+ "content": "<|end_of_sentence|>",
216
+ "single_word": false,
217
+ "lstrip": false,
218
+ "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
  }
222
  ],
223
  "normalizer": {
 
276
  "single": [
277
  {
278
  "SpecialToken": {
279
+ "id": "<|begin_of_sentence|>",
280
  "type_id": 0
281
  }
282
  },
 
290
  "pair": [
291
  {
292
  "SpecialToken": {
293
+ "id": "<|begin_of_sentence|>",
294
  "type_id": 0
295
  }
296
  },
 
302
  },
303
  {
304
  "SpecialToken": {
305
+ "id": "<|begin_of_sentence|>",
306
  "type_id": 1
307
  }
308
  },
 
314
  }
315
  ],
316
  "special_tokens": {
317
+ "<|begin_of_sentence|>": {
318
+ "id": "<|begin_of_sentence|>",
319
  "ids": [
320
+ 32022
321
  ],
322
  "tokens": [
323
+ "<|begin_of_sentence|>"
324
  ]
325
  }
326
  }
tokenizer_config.json CHANGED
@@ -177,18 +177,34 @@
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
  },
182
- "bos_token": "<|begin▁of▁sentence|>",
183
  "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
184
  "clean_up_tokenization_spaces": false,
185
- "eos_token": "<|EOT|>",
186
  "legacy": true,
187
  "model_max_length": 16384,
188
- "pad_token": "<|end▁of▁sentence|>",
189
- "padding_side": "left",
190
  "sp_model_kwargs": {},
191
  "tokenizer_class": "LlamaTokenizer",
192
  "unk_token": null,
193
- "use_default_system_prompt": false
 
194
  }
 
177
  "rstrip": false,
178
  "single_word": false,
179
  "special": true
180
+ },
181
+ "32022": {
182
+ "content": "<|begin_of_sentence|>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": true
188
+ },
189
+ "32023": {
190
+ "content": "<|end_of_sentence|>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": true
196
  }
197
  },
198
+ "bos_token": "<|begin_of_sentence|>",
199
  "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
200
  "clean_up_tokenization_spaces": false,
201
+ "eos_token": "<|end_of_sentence|>",
202
  "legacy": true,
203
  "model_max_length": 16384,
204
+ "pad_token": "<|end_of_sentence|>",
 
205
  "sp_model_kwargs": {},
206
  "tokenizer_class": "LlamaTokenizer",
207
  "unk_token": null,
208
+ "use_default_system_prompt": false,
209
+ "use_fast": true
210
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bb5dadad399f56bab17c0969fd231762f07aae4fc36f59d67ab4a339e567cfc
3
  size 5880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0eaa08222fb80db93edb290a2d6190a3b510a1f8d4226cb8cefe6ee779fbded5
3
  size 5880