tangzhy commited on
Commit
b447e6f
1 Parent(s): 2f72adf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -27,12 +27,12 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
27
 
28
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
29
 
30
- quantization_config = BitsAndBytesConfig(
31
- load_in_4bit=True,
32
- bnb_4bit_compute_dtype=torch.bfloat16,
33
- bnb_4bit_use_double_quant=True,
34
- bnb_4bit_quant_type= "nf4")
35
- # quantization_config = BitsAndBytesConfig(load_in_8bit=True)
36
 
37
  model_id = "CardinalOperations/ORLM-LLaMA-3-8B"
38
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 
27
 
28
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
29
 
30
+ # quantization_config = BitsAndBytesConfig(
31
+ # load_in_4bit=True,
32
+ # bnb_4bit_compute_dtype=torch.bfloat16,
33
+ # bnb_4bit_use_double_quant=True,
34
+ # bnb_4bit_quant_type= "nf4")
35
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
36
 
37
  model_id = "CardinalOperations/ORLM-LLaMA-3-8B"
38
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)