chandlerTSLabs commited on
Commit
fe4d315
0 Parent(s):

Initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: peft
4
+ base_model: mistralai/Mistral-7B-v0.1
5
+ ---
6
+
7
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
8
+ should probably proofread and complete it, then remove this comment. -->
9
+
10
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
11
+ <details><summary>See axolotl config</summary>
12
+
13
+ axolotl version: `0.4.0`
14
+
15
+ ```yaml
16
+ base_model: mistralai/Mistral-7B-v0.1
17
+ model_type: MistralForCausalLM
18
+ tokenizer_type: LlamaTokenizergin
19
+ is_mistral_derived_model: true
20
+
21
+ load_in_8bit: false
22
+ load_in_4bit: false
23
+ strict: false
24
+
25
+ datasets:
26
+ # This will be the path used for the data when it is saved to the Volume in the cloud.
27
+ - path: data.jsonl
28
+ ds_type: json
29
+ type:
30
+ # JSONL file contains question, context, answer fields per line.
31
+ # This gets mapped to instruction, input, output axolotl tags.
32
+ field_instruction: instruction
33
+ field_input: input
34
+ field_output: output
35
+ # Format is used by axolotl to generate the prompt.
36
+ format: |-
37
+ [INST]{input}
38
+ {instruction} [/INST]
39
+
40
+ dataset_prepared_path:
41
+ val_set_size: 0.05
42
+ output_dir: ./lora-out
43
+
44
+ sequence_len: 4096
45
+ sample_packing: false
46
+ eval_sample_packing: false
47
+ pad_to_sequence_len: false
48
+
49
+ adapter: lora
50
+ lora_model_dir:
51
+ lora_r: 16
52
+ lora_alpha: 32
53
+ lora_dropout: 0.05
54
+ lora_target_linear: true
55
+ lora_fan_in_fan_out:
56
+
57
+ wandb_project:
58
+ wandb_entity:
59
+ wandb_watch:
60
+ wandb_run_id:
61
+
62
+ gradient_accumulation_steps: 1
63
+ micro_batch_size: 32
64
+ num_epochs: 4
65
+ optimizer: adamw_torch
66
+ lr_scheduler: cosine
67
+ learning_rate: 0.0001
68
+
69
+ bf16: auto
70
+ fp16: false
71
+ tf32: false
72
+ train_on_inputs: false
73
+ group_by_length: false
74
+
75
+ gradient_checkpointing: true
76
+ early_stopping_patience:
77
+ resume_from_checkpoint:
78
+ local_rank:
79
+ logging_steps: 1
80
+ xformers_attention:
81
+ flash_attention: true
82
+
83
+ warmup_steps: 10
84
+ save_steps:
85
+ debug:
86
+ deepspeed: /root/axolotl/deepspeed_configs/zero3_bf16.json
87
+ weight_decay: 0.0
88
+ fsdp:
89
+ fsdp_config:
90
+ special_tokens:
91
+ bos_token: "<s>"
92
+ eos_token: "</s>"
93
+ unk_token: "<unk>"
94
+
95
+ ```
96
+
97
+ </details><br>
98
+
99
+ # Mistral Sentiment Analysis
100
+
101
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the [FinGPT Sentiment](https://huggingface.co/datasets/FinGPT/fingpt-sentiment-train) dataset. It is intended to be used for sentiment analysis tasks for financial data. See the [FinGPT Project](https://github.com/AI4Finance-Foundation/FinGPT) for more information.
102
+ It achieves the following results on the evaluation set:
103
+ * Loss: 0.1598
104
+
105
+ ## Ollama Example
106
+
107
+ ```bash
108
+ ollama run chand1012/mistral_sentiment
109
+ >>> Apple (NASDAQ:AAPL) Up Fractionally despite Rising Vision Pro Returns Please choose an answer from {negative/neutral/positive}
110
+ positive
111
+ ```
112
+
113
+ ## Python Example
114
+
115
+ ```python
116
+ from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizerFast
117
+ from peft import PeftModel # 0.8.2
118
+
119
+ # Load Models
120
+ base_model = "mistralai/Mistral-7B-v0.1"
121
+ peft_model = "TimeSurgeLabs/mistral_sentiment_lora"
122
+ tokenizer = LlamaTokenizerFast.from_pretrained(base_model, trust_remote_code=True)
123
+ tokenizer.pad_token = tokenizer.eos_token
124
+ model = LlamaForCausalLM.from_pretrained(base_model, trust_remote_code=True, device_map = "cuda:0", load_in_8bit = True,)
125
+ model = PeftModel.from_pretrained(model, peft_model)
126
+ model = model.eval()
127
+
128
+ # Make prompts
129
+ prompt = [
130
+ '''Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}
131
+ Input: FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is aggressively pursuing its growth strategy by increasingly focusing on technologically more demanding HDI printed circuit boards PCBs .
132
+ Answer: ''',
133
+ '''Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}
134
+ Input: According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .
135
+ Answer: ''',
136
+ '''Instruction: What is the sentiment of this news? Please choose an answer from {negative/neutral/positive}
137
+ Input: A tinyurl link takes users to a scamming site promising that users can earn thousands of dollars by becoming a Google ( NASDAQ : GOOG ) Cash advertiser .
138
+ Answer: ''',
139
+ ]
140
+
141
+ # Generate results
142
+ tokens = tokenizer(prompt, return_tensors='pt', padding=True, max_length=512)
143
+ res = model.generate(**tokens, max_length=512)
144
+ res_sentences = [tokenizer.decode(i) for i in res]
145
+ out_text = [o.split("Answer: ")[1] for o in res_sentences]
146
+
147
+ # show results
148
+ for sentiment in out_text:
149
+ print(sentiment)
150
+
151
+ # Output:
152
+ # positive
153
+ # neutral
154
+ # negative
155
+ ```
156
+
157
+ ## Training procedure
158
+
159
+ ### Training hyperparameters
160
+
161
+ The following hyperparameters were used during training:
162
+ * learning_rate: 0.0001
163
+ * train_batch_size: 32
164
+ * eval_batch_size: 32
165
+ * seed: 42
166
+ * distributed_type: multi-GPU
167
+ * num_devices: 2
168
+ * total_train_batch_size: 64
169
+ * total_eval_batch_size: 64
170
+ * optimizer: Adam with betas=(0.9, 0.999) and epsilon=1e-08
171
+ * lr_scheduler_type: cosine
172
+ * lr_scheduler_warmup_steps: 10
173
+ * num_epochs: 4
174
+
175
+ ### Training results
176
+
177
+ | Training Loss | Epoch | Step | Validation Loss |
178
+ |:-------------:|:-----:|:----:|:---------------:|
179
+ | 0.0678 | 1.0 | 1140 | 0.1124 |
180
+ | 0.1339 | 2.0 | 2280 | 0.1008 |
181
+ | 0.0497 | 3.0 | 3420 | 0.1146 |
182
+ | 0.0016 | 4.0 | 4560 | 0.1598 |
183
+
184
+ ### Framework versions
185
+
186
+ * PEFT 0.8.2
187
+ * Transformers 4.38.0.dev0
188
+ * Pytorch 2.1.2+cu121
189
+ * Datasets 2.17.0
190
+ * Tokenizers 0.15.0
adapter_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
5
+ "bias": "none",
6
+ "fan_in_fan_out": null,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 32,
13
+ "lora_dropout": 0.05,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 16,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "gate_proj",
23
+ "v_proj",
24
+ "o_proj",
25
+ "k_proj",
26
+ "up_proj",
27
+ "down_proj",
28
+ "q_proj"
29
+ ],
30
+ "task_type": "CAUSAL_LM",
31
+ "use_rslora": false
32
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fee993242168c43115a0b719d530b6355a11e3756b044504c4c837f1ff9f44a
3
+ size 83946192
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.38.0.dev0",
24
+ "use_cache": false,
25
+ "vocab_size": 32000
26
+ }
ggml-adapter-model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5460b3f6415dd6264e7b288ea5251e6451abcfe9f57011a1e96dd4ba992ab28a
3
+ size 167800832
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "</s>",
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "trust_remote_code": false,
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false,
43
+ "use_fast": true
44
+ }