azrai99 commited on
Commit
3339a48
1 Parent(s): f8bc93a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -23,13 +23,14 @@ def configure_quantization():
23
  # Initialize the LLM
24
  @st.cache_resource
25
  def initialize_llm(hf_token):
26
- quantization_config = configure_quantization()
27
  model_name = 'HuggingFaceH4/zephyr-7b-beta'
28
  return HuggingFaceLLM(
29
  model_name = model_name, #meta-llama/Meta-Llama-3-8B-Instruct meta-llama/Llama-2-7b-chat-hf #google/gemma-7b-it #HuggingFaceH4/zephyr-7b-beta
30
  tokenizer_name = model_name,
31
  context_window=3900,
32
- model_kwargs={"token": hf_token, "quantization_config": quantization_config},
 
33
  tokenizer_kwargs={"token": hf_token},
34
  max_new_tokens=400,
35
  device_map="auto",
 
23
  # Initialize the LLM
24
  @st.cache_resource
25
  def initialize_llm(hf_token):
26
+ # quantization_config = configure_quantization()
27
  model_name = 'HuggingFaceH4/zephyr-7b-beta'
28
  return HuggingFaceLLM(
29
  model_name = model_name, #meta-llama/Meta-Llama-3-8B-Instruct meta-llama/Llama-2-7b-chat-hf #google/gemma-7b-it #HuggingFaceH4/zephyr-7b-beta
30
  tokenizer_name = model_name,
31
  context_window=3900,
32
+ # model_kwargs={"token": hf_token, "quantization_config": quantization_config},
33
+ model_kwargs={"token": hf_token}
34
  tokenizer_kwargs={"token": hf_token},
35
  max_new_tokens=400,
36
  device_map="auto",