piotr25691
/

llama-3-cat-8b-instruct-v1-gguf

Model card Files Files and versions Community

piotr25691 commited on May 15

Commit

050948f

•

1 Parent(s): 0af877a

conversion with imatrix

Files changed (1) hide show

conv.sh +7 -3

conv.sh CHANGED Viewed

@@ -2,7 +2,7 @@
 # convert PyTorch model
 # LLaMA 3
-convert --vocab-type bpe --outtype f16 --outfile conv.bin .
 # LLaMA 2
 # convert --outtype f16 --outfile conv.bin .
@@ -10,8 +10,12 @@ convert --vocab-type bpe --outtype f16 --outfile conv.bin .
 # define TheBloke quants and create them
 declare -a quants=(Q2_K Q3_K_S Q3_K_M Q3_K_L Q4_0 Q4_K_S Q4_K_M Q5_0 Q5_K_S Q5_K_M Q6_K Q8_0)
 currpath=${PWD##*/}
 for quant in "${quants[@]}";
 do
-  quantize conv.bin ${currpath::-5}.$quant.gguf $quant
-done

 # convert PyTorch model
 # LLaMA 3
+# convert --vocab-type bpe --outtype f16 --outfile conv.bin .
 # LLaMA 2
 # convert --outtype f16 --outfile conv.bin .
 # define TheBloke quants and create them
 declare -a quants=(Q2_K Q3_K_S Q3_K_M Q3_K_L Q4_0 Q4_K_S Q4_K_M Q5_0 Q5_K_S Q5_K_M Q6_K Q8_0)
 currpath=${PWD##*/}
+basefile=$(ls -Art *.F16.gguf)
 for quant in "${quants[@]}";
 do
+  # basic GGUF
+  quantize $basefile ${currpath::-5}.$quant.gguf $quant
+  # imatrix GGUF
+  # quantize --imatrix model.imatrix $basefile ${currpath::-5}.$quant.gguf $quant
+done