AnishHF commited on
Commit
0b7787a
1 Parent(s): 8b8d45c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -2,14 +2,24 @@ import os
2
  import bitsandbytes as bnb
3
  import torch
4
  import gradio as gr
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
 
7
  access_token = os.environ["GATED_ACCESS_TOKEN"]
8
 
 
 
 
 
 
 
 
 
 
 
9
  # Load the tokenizer and model
10
- model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
11
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token)
12
- model = AutoModelForCausalLM.from_pretrained(model_id, token=access_token, load_in_4bit=True)
13
  #model = AutoModelForCausalLM.from_pretrained(model_id, token=access_token)
14
  # Initialize the quantizer
15
  #quantizer = bnb.GemmQuantizer(act_bits=8, weight_bits=8)
 
2
  import bitsandbytes as bnb
3
  import torch
4
  import gradio as gr
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
6
 
7
  access_token = os.environ["GATED_ACCESS_TOKEN"]
8
 
9
+ # specify how to quantize the model
10
+ quantization_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_quant_type="nf4",
13
+ bnb_4bit_compute_dtype="torch.float16",
14
+ )
15
+
16
+ model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-v0.1", quantization_config=True, device_map="auto")
17
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mixtral-8x7B-v0.1")
18
+
19
  # Load the tokenizer and model
20
+ #model_id = "mistralai/Mixtral-8x7B-v0.1"
21
+ #tokenizer = AutoTokenizer.from_pretrained(model_id, token=access_token)
22
+ #model = AutoModelForCausalLM.from_pretrained(model_id, token=access_token, load_in_4bit=True)
23
  #model = AutoModelForCausalLM.from_pretrained(model_id, token=access_token)
24
  # Initialize the quantizer
25
  #quantizer = bnb.GemmQuantizer(act_bits=8, weight_bits=8)