mobicham commited on
Commit
2608dc1
1 Parent(s): f894411

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -63,7 +63,7 @@ prepare_for_inference(model, backend="marlin", allow_merge=True) #use float16
63
 
64
  #Generate
65
  from hqq.utils.generation_hf import HFGenerator
66
-
67
  gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial")
68
 
69
  gen.generate("Write an essay about large language models", print_tokens=True)
 
63
 
64
  #Generate
65
  from hqq.utils.generation_hf import HFGenerator
66
+ #For longer context, make sure to allocate enough cache via the cache_size= parameter
67
  gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial")
68
 
69
  gen.generate("Write an essay about large language models", print_tokens=True)