liuylhf commited on
Commit
5cc1ca2
1 Parent(s): 7c01285

Model save

Browse files
README.md CHANGED
@@ -2,7 +2,6 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - axolotl
6
  - generated_from_trainer
7
  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
8
  model-index:
@@ -27,7 +26,6 @@ load_in_4bit: true
27
  strict: false
28
  chat_template: inst
29
 
30
- datasets:
31
  datasets:
32
  - path: ./data/raw_format/tool_used_training.jsonl
33
  type: sharegpt
@@ -41,7 +39,7 @@ datasets:
41
 
42
  dataset_prepared_path: last_run_prepared
43
  val_set_size: 0.01
44
- output_dir: ./mixtral-qlora-1-epochs-r64
45
 
46
  adapter: qlora
47
  lora_model_dir:
@@ -51,7 +49,7 @@ sample_packing: true
51
  pad_to_sequence_len: true
52
 
53
  lora_r: 64
54
- lora_alpha: 16
55
  lora_dropout: 0.05
56
  lora_fan_in_fan_out:
57
  hub_model_id: liuylhf/mixtral-lora-less-modules
@@ -69,7 +67,7 @@ wandb_log_model: end
69
 
70
  gradient_accumulation_steps: 4
71
  micro_batch_size: 2
72
- num_epochs: 1
73
  optimizer: paged_adamw_8bit
74
  lr_scheduler: cosine
75
  learning_rate: 0.001
@@ -112,9 +110,7 @@ fsdp_config:
112
 
113
  # mixtral-lora-less-modules
114
 
115
- This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the None dataset.
116
- It achieves the following results on the evaluation set:
117
- - Loss: 0.1911
118
 
119
  ## Model description
120
 
@@ -145,23 +141,7 @@ The following hyperparameters were used during training:
145
  - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
146
  - lr_scheduler_type: cosine
147
  - lr_scheduler_warmup_steps: 10
148
- - num_epochs: 1
149
-
150
- ### Training results
151
-
152
- | Training Loss | Epoch | Step | Validation Loss |
153
- |:-------------:|:-----:|:----:|:---------------:|
154
- | 3.2966 | 0.0 | 1 | 3.2222 |
155
- | 0.261 | 0.1 | 31 | 0.2720 |
156
- | 0.1428 | 0.2 | 62 | 0.2252 |
157
- | 0.2674 | 0.3 | 93 | 0.2108 |
158
- | 0.1767 | 0.4 | 124 | 0.2043 |
159
- | 0.105 | 0.5 | 155 | 0.2003 |
160
- | 0.1799 | 0.6 | 186 | 0.1958 |
161
- | 0.1528 | 0.7 | 217 | 0.1942 |
162
- | 0.1954 | 0.8 | 248 | 0.1917 |
163
- | 0.1821 | 0.9 | 279 | 0.1911 |
164
-
165
 
166
  ### Framework versions
167
 
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - generated_from_trainer
6
  base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
7
  model-index:
 
26
  strict: false
27
  chat_template: inst
28
 
 
29
  datasets:
30
  - path: ./data/raw_format/tool_used_training.jsonl
31
  type: sharegpt
 
39
 
40
  dataset_prepared_path: last_run_prepared
41
  val_set_size: 0.01
42
+ output_dir: ./mixtral-lora-2-epochs-r64
43
 
44
  adapter: qlora
45
  lora_model_dir:
 
49
  pad_to_sequence_len: true
50
 
51
  lora_r: 64
52
+ lora_alpha: 64
53
  lora_dropout: 0.05
54
  lora_fan_in_fan_out:
55
  hub_model_id: liuylhf/mixtral-lora-less-modules
 
67
 
68
  gradient_accumulation_steps: 4
69
  micro_batch_size: 2
70
+ num_epochs: 4
71
  optimizer: paged_adamw_8bit
72
  lr_scheduler: cosine
73
  learning_rate: 0.001
 
110
 
111
  # mixtral-lora-less-modules
112
 
113
+ This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on an unknown dataset.
 
 
114
 
115
  ## Model description
116
 
 
141
  - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
142
  - lr_scheduler_type: cosine
143
  - lr_scheduler_warmup_steps: 10
144
+ - num_epochs: 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  ### Framework versions
147
 
adapter_config.json CHANGED
@@ -9,7 +9,7 @@
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
- "lora_alpha": 16,
13
  "lora_dropout": 0.05,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
@@ -19,10 +19,10 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "o_proj",
23
- "q_proj",
24
  "v_proj",
25
- "k_proj"
 
 
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
+ "lora_alpha": 64,
13
  "lora_dropout": 0.05,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "v_proj",
23
+ "k_proj",
24
+ "q_proj",
25
+ "o_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd223b7576019fbc18fdb6df07a26b9b662da56d34b65c55bfe245668d413a3f
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd14376b2f68b0c33a669ad954dd4a0f21b6ff2c5861b9ffc7865fdf6749503
3
  size 218138576
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007877440649249ac071b2c5c00afa3c113d2399d566bb3f2dab4235d750f45e
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e7d1c82c017295bf67e72f2f9c3276671b5c7c5e56727f36146b9af4b7bc72e
3
  size 5624