ORLM

Running on Zero

tangzhy commited on Jul 26

Commit

b7bc525

•

1 Parent(s): b57680f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import torch
 from transformers import (
     AutoModelForCausalLM,
     BitsAndBytesConfig,
-    GemmaTokenizerFast,
     TextIteratorStreamer,
 )
@@ -29,7 +29,7 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_id = "google/gemma-2-9b-it"
-tokenizer = GemmaTokenizerFast.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",

 from transformers import (
     AutoModelForCausalLM,
     BitsAndBytesConfig,
+    AutoTokenizer,
     TextIteratorStreamer,
 )
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 model_id = "google/gemma-2-9b-it"
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",