Training in progress, epoch 1

Browse files

Files changed (7) hide show

adapter_config.json +10 -10
adapter_model.safetensors +2 -2
config.json +18 -4
special_tokens_map.json +6 -6
tokenizer.json +25 -7
tokenizer_config.json +21 -5
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -3,15 +3,15 @@
   "auto_mapping": null,
   "base_model_name_or_path": "deepseek-ai/deepseek-coder-6.7b-instruct",
   "bias": "none",
-  "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 32,
-  "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
@@ -19,17 +19,17 @@
     "lm_head"
   ],
   "peft_type": "LORA",
-  "r": 128,
   "rank_pattern": {},
-  "revision": "unsloth",
   "target_modules": [
     "down_proj",
     "o_proj",
-    "v_proj",
-    "q_proj",
-    "k_proj",
-    "up_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "auto_mapping": null,
   "base_model_name_or_path": "deepseek-ai/deepseek-coder-6.7b-instruct",
   "bias": "none",
+  "fan_in_fan_out": null,
   "inference_mode": true,
   "init_lora_weights": true,
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
     "lm_head"
   ],
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
+  "revision": null,
   "target_modules": [
+    "up_proj",
+    "q_proj",
+    "v_proj",
+    "gate_proj",
     "down_proj",
     "o_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7d2ea2c53bae73716e36ef6ef01ee5cad72301210222229e23ef62077ab397f
-size 2336289560

 version https://git-lfs.github.com/spec/v1
+oid sha256:588591505247958507363d98bfb2535733c13dde0ff41a2d5c05267a8ab37eb1
+size 848358568

config.json CHANGED Viewed

@@ -5,8 +5,8 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "bos_token_id": 32013,
-  "eos_token_id": 32021,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
@@ -17,6 +17,21 @@
   "num_hidden_layers": 32,
   "num_key_value_heads": 32,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
     "factor": 4.0,
@@ -26,7 +41,6 @@
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.40.0.dev0",
-  "unsloth_version": "2024.6",
-  "use_cache": true,
   "vocab_size": 32256
 }

   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "bos_token_id": 32022,
+  "eos_token_id": 32023,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
   "num_hidden_layers": 32,
   "num_key_value_heads": 32,
   "pretraining_tp": 1,
+  "quantization_config": {
+    "_load_in_4bit": false,
+    "_load_in_8bit": true,
+    "bnb_4bit_compute_dtype": "float32",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "fp4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": false,
+    "load_in_8bit": true,
+    "quant_method": "bitsandbytes"
+  },
   "rms_norm_eps": 1e-06,
   "rope_scaling": {
     "factor": 4.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.40.0.dev0",
+  "use_cache": false,
   "vocab_size": 32256
 }

special_tokens_map.json CHANGED Viewed

@@ -1,22 +1,22 @@
 {
   "bos_token": {
-    "content": "<｜begin▁of▁sentence｜>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
-    "content": "<|EOT|>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
-    "content": "<｜end▁of▁sentence｜>",
     "lstrip": false,
-    "normalized": true,
     "rstrip": false,
     "single_word": false
   }

 {
   "bos_token": {
+    "content": "<|begin_of_sentence|>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "eos_token": {
+    "content": "<|end_of_sentence|>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "<|end_of_sentence|>",
     "lstrip": false,
+    "normalized": false,
     "rstrip": false,
     "single_word": false
   }

tokenizer.json CHANGED Viewed

@@ -200,6 +200,24 @@
       "rstrip": false,
       "normalized": true,
       "special": true
     }
   ],
   "normalizer": {
@@ -258,7 +276,7 @@
     "single": [
       {
         "SpecialToken": {
-          "id": "<｜begin▁of▁sentence｜>",
           "type_id": 0
         }
       },
@@ -272,7 +290,7 @@
     "pair": [
       {
         "SpecialToken": {
-          "id": "<｜begin▁of▁sentence｜>",
           "type_id": 0
         }
       },
@@ -284,7 +302,7 @@
       },
       {
         "SpecialToken": {
-          "id": "<｜begin▁of▁sentence｜>",
           "type_id": 1
         }
       },
@@ -296,13 +314,13 @@
       }
     ],
     "special_tokens": {
-      "<｜begin▁of▁sentence｜>": {
-        "id": "<｜begin▁of▁sentence｜>",
         "ids": [
-          32013
         ],
         "tokens": [
-          "<｜begin▁of▁sentence｜>"
         ]
       }
     }

       "rstrip": false,
       "normalized": true,
       "special": true
+    },
+    {
+      "id": 32022,
+      "content": "<|begin_of_sentence|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 32023,
+      "content": "<|end_of_sentence|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": {
     "single": [
       {
         "SpecialToken": {
+          "id": "<|begin_of_sentence|>",
           "type_id": 0
         }
       },
     "pair": [
       {
         "SpecialToken": {
+          "id": "<|begin_of_sentence|>",
           "type_id": 0
         }
       },
       },
       {
         "SpecialToken": {
+          "id": "<|begin_of_sentence|>",
           "type_id": 1
         }
       },
       }
     ],
     "special_tokens": {
+      "<|begin_of_sentence|>": {
+        "id": "<|begin_of_sentence|>",
         "ids": [
+          32022
         ],
         "tokens": [
+          "<|begin_of_sentence|>"
         ]
       }
     }

tokenizer_config.json CHANGED Viewed

@@ -177,18 +177,34 @@
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
-  "bos_token": "<｜begin▁of▁sentence｜>",
   "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "<|EOT|>",
   "legacy": true,
   "model_max_length": 16384,
-  "pad_token": "<｜end▁of▁sentence｜>",
-  "padding_side": "left",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": null,
-  "use_default_system_prompt": false
 }

       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "32022": {
+      "content": "<|begin_of_sentence|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32023": {
+      "content": "<|end_of_sentence|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
     }
   },
+  "bos_token": "<|begin_of_sentence|>",
   "chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|end_of_sentence|>",
   "legacy": true,
   "model_max_length": 16384,
+  "pad_token": "<|end_of_sentence|>",
   "sp_model_kwargs": {},
   "tokenizer_class": "LlamaTokenizer",
   "unk_token": null,
+  "use_default_system_prompt": false,
+  "use_fast": true
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9bb5dadad399f56bab17c0969fd231762f07aae4fc36f59d67ab4a339e567cfc
 size 5880

 version https://git-lfs.github.com/spec/v1
+oid sha256:0eaa08222fb80db93edb290a2d6190a3b510a1f8d4226cb8cefe6ee779fbded5
 size 5880