femiari commited on
Commit
66a2e59
1 Parent(s): 5b44560

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -24
README.md CHANGED
@@ -21,50 +21,28 @@ QwenMoEAriel is a Mixture of Experts (MoE) made with the following models using
21
 
22
  ## 🧩 Configuration
23
  base_model : Qwen/Qwen2-1.5B
24
-
25
  architecture: qwen
26
-
27
  experts:
28
-
29
  - source_model: Qwen/Qwen2-1.5B
30
-
31
  positive_prompts:
32
-
33
  - "chat"
34
-
35
  - "assistant"
36
-
37
  - "tell me"
38
-
39
  - "explain"
40
-
41
  - "I want"
42
-
43
  - source_model: Replete-AI/Replete-Coder-Qwen2-1.5b
44
-
45
  positive_prompts:
46
-
47
  - "code"
48
-
49
  - "python"
50
-
51
  - "javascript"
52
-
53
  - "programming"
54
-
55
  - "algorithm"
56
-
57
  shared_experts:
58
-
59
  - source_model: Qwen/Qwen2-1.5B
60
-
61
  positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
62
-
63
- - "chat"
64
-
65
  # (optional, but recommended:)
66
-
67
- residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model
68
 
69
  ## 💻 Usage
70
 
 
21
 
22
  ## 🧩 Configuration
23
  base_model : Qwen/Qwen2-1.5B
 
24
  architecture: qwen
 
25
  experts:
 
26
  - source_model: Qwen/Qwen2-1.5B
 
27
  positive_prompts:
 
28
  - "chat"
 
29
  - "assistant"
 
30
  - "tell me"
 
31
  - "explain"
 
32
  - "I want"
 
33
  - source_model: Replete-AI/Replete-Coder-Qwen2-1.5b
 
34
  positive_prompts:
 
35
  - "code"
 
36
  - "python"
 
37
  - "javascript"
 
38
  - "programming"
 
39
  - "algorithm"
 
40
  shared_experts:
 
41
  - source_model: Qwen/Qwen2-1.5B
 
42
  positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
43
+ - "chat"
 
 
44
  # (optional, but recommended:)
45
+ residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model
 
46
 
47
  ## 💻 Usage
48