Pclanglais
/

Brahe-AWQ

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

Add transformers + AWQ inference support

#1

by ybelkada - opened Nov 4, 2023

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

Files changed (1) hide show

config.json +8 -1

config.json CHANGED Viewed

@@ -24,5 +24,12 @@
   "torch_dtype": "float16",
   "transformers_version": "4.35.0",
   "use_cache": true,
-  "vocab_size": 32000
 }

   "torch_dtype": "float16",
   "transformers_version": "4.35.0",
   "use_cache": true,
+  "vocab_size": 32000,
+  "quantization_config": {
+    "quant_method": "awq",
+    "zero_point": true,
+    "group_size": 128,
+    "bits": 4,
+    "version": "gemm"
+  }
 }