peruginia commited on
Commit
865fbd3
1 Parent(s): 447d953

update model to step 124484

Browse files
.ipynb_checkpoints/README-checkpoint.md CHANGED
@@ -2,6 +2,7 @@
2
  language:
3
  - it
4
  pipeline_tag: text-generation
 
5
  widget:
6
  - text: Alessandro è un ragazzo che progetta Infissi
7
  - text: Melissa è una ragazza che adora
@@ -10,31 +11,27 @@ tags:
10
  - italiano
11
  - llama
12
  ---
13
- This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)
14
-
15
- the train is ongoing and will extend to new datasets.
16
-
17
- More precise versions will be published shortly.
18
-
19
- Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c
20
-
21
- # max_seq_len: 7b = 2048: It represents the maximum sequence length for input data.
22
- max_seq_len = 1024 #7b=2048
23
-
24
- # dim 7b= 4096: This attribute represents the dimensionality of the model
25
- dim = 768
26
-
27
- # n_layers: 7b = 32: It specifies the number of layers in the model
28
- n_layers = 32
29
-
30
- # n_heads: 7b = 32: This attribute determines the number of attention heads in the model
31
- n_heads = 32
32
-
33
- # n_kv_heads: 7b = 32: It represents the number of key and value heads,
34
- n_kv_heads = 32
35
-
36
- # multiple_of: 7b = 256: It specifies a value used to make the SwiGLU hidden layer size a multiple of a large power of 2
37
- multiple_of = 32
38
-
39
- num decayed parameter tensors: 225, with 251,068,416 parameters
40
- num non-decayed parameter tensors: 65, with 49,920 parameters
 
2
  language:
3
  - it
4
  pipeline_tag: text-generation
5
+ max_length: 100
6
  widget:
7
  - text: Alessandro è un ragazzo che progetta Infissi
8
  - text: Melissa è una ragazza che adora
 
11
  - italiano
12
  - llama
13
  ---
14
+ This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)<br/>
15
+ <br/>
16
+ the train is ongoing and will extend to new datasets.<br/>
17
+ <br/>
18
+ More precise versions will be published shortly.<br/>
19
+ <br/>
20
+ Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
21
+ <br/>
22
+ # max_seq_len: (7b = 2048) The maximum sequence length for input data.<br/>
23
+ # dim (7b= 4096) Represents the dimensionality of the model<br/>
24
+ # n_layers: (7b = 32) The number of layers in the model<br/>
25
+ # n_heads: (7b = 32) Determines the number of attention heads in the model<br/>
26
+ # n_kv_heads: (7b = 32) The number of key and value heads<br/>
27
+ # multiple_of: (7b = 256) A value used to make the SwiGLU hidden layer size a multiple of a large power of 2<br/>
28
+ <br/>
29
+ max_seq_len = 1024<br/>
30
+ dim = 768<br/>
31
+ n_layers = 32<br/>
32
+ n_heads = 32<br/>
33
+ n_kv_heads = 32<br/>
34
+ multiple_of = 32<br/>
35
+ <br/>
36
+ num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
37
+ num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
 
 
 
 
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 2048,
13
+ "max_position_embeddings": 1024,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 32,
18
+ "pretraining_tp": 1,
19
+ "rms_norm_eps": 1e-05,
20
+ "rope_scaling": null,
21
+ "rope_theta": 10000.0,
22
+ "tie_word_embeddings": true,
23
+ "transformers_version": "4.37.1",
24
+ "use_cache": true,
25
+ "vocab_size": 32000
26
+ }
README.md CHANGED
@@ -11,27 +11,27 @@ tags:
11
  - italiano
12
  - llama
13
  ---
14
- This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)
15
-
16
- the train is ongoing and will extend to new datasets.
17
-
18
- More precise versions will be published shortly.
19
-
20
- Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c
21
-
22
- # max_seq_len: 7b = 2048: It represents the maximum sequence length for input data.
23
- # dim 7b= 4096: This attribute represents the dimensionality of the model
24
- # n_layers: 7b = 32: It specifies the number of layers in the model
25
- # n_heads: 7b = 32: This attribute determines the number of attention heads in the model
26
- # n_kv_heads: 7b = 32: It represents the number of key and value heads,
27
- # multiple_of: 7b = 256: It specifies a value used to make the SwiGLU hidden layer size a multiple of a large power of 2
28
-
29
- max_seq_len = 1024
30
- dim = 768
31
- n_layers = 32
32
- n_heads = 32
33
- n_kv_heads = 32
34
- multiple_of = 32
35
-
36
- num decayed parameter tensors: 225, with 251,068,416 parameters
37
- num non-decayed parameter tensors: 65, with 49,920 parameters
 
11
  - italiano
12
  - llama
13
  ---
14
+ This is a train starting from an empty model based exclusively on Italian language datasets (currently redpajama 2023-14 it)<br/>
15
+ <br/>
16
+ the train is ongoing and will extend to new datasets.<br/>
17
+ <br/>
18
+ More precise versions will be published shortly.<br/>
19
+ <br/>
20
+ Train on my server, i have studied and adapted the model starting from the repository https://github.com/karpathy/llama2.c<br/>
21
+ <br/>
22
+ # max_seq_len: (7b = 2048) The maximum sequence length for input data.<br/>
23
+ # dim (7b= 4096) Represents the dimensionality of the model<br/>
24
+ # n_layers: (7b = 32) The number of layers in the model<br/>
25
+ # n_heads: (7b = 32) Determines the number of attention heads in the model<br/>
26
+ # n_kv_heads: (7b = 32) The number of key and value heads<br/>
27
+ # multiple_of: (7b = 256) A value used to make the SwiGLU hidden layer size a multiple of a large power of 2<br/>
28
+ <br/>
29
+ max_seq_len = 1024<br/>
30
+ dim = 768<br/>
31
+ n_layers = 32<br/>
32
+ n_heads = 32<br/>
33
+ n_kv_heads = 32<br/>
34
+ multiple_of = 32<br/>
35
+ <br/>
36
+ num decayed parameter tensors: 225, with 251,068,416 parameters<br/>
37
+ num non-decayed parameter tensors: 65, with 49,920 parameters<br/>
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0235d17ba1f3feecb09f3975964ced834af07144e40ac997e2a88efa30271a97
3
  size 1004567442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab62b69b46b7f795f22d07447f33fa985864f7fdd281df9a3d26834a1750744f
3
  size 1004567442