Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,13 +23,14 @@ def configure_quantization():
|
|
23 |
# Initialize the LLM
|
24 |
@st.cache_resource
|
25 |
def initialize_llm(hf_token):
|
26 |
-
quantization_config = configure_quantization()
|
27 |
model_name = 'HuggingFaceH4/zephyr-7b-beta'
|
28 |
return HuggingFaceLLM(
|
29 |
model_name = model_name, #meta-llama/Meta-Llama-3-8B-Instruct meta-llama/Llama-2-7b-chat-hf #google/gemma-7b-it #HuggingFaceH4/zephyr-7b-beta
|
30 |
tokenizer_name = model_name,
|
31 |
context_window=3900,
|
32 |
-
model_kwargs={"token": hf_token, "quantization_config": quantization_config},
|
|
|
33 |
tokenizer_kwargs={"token": hf_token},
|
34 |
max_new_tokens=400,
|
35 |
device_map="auto",
|
|
|
23 |
# Initialize the LLM
|
24 |
@st.cache_resource
|
25 |
def initialize_llm(hf_token):
|
26 |
+
# quantization_config = configure_quantization()
|
27 |
model_name = 'HuggingFaceH4/zephyr-7b-beta'
|
28 |
return HuggingFaceLLM(
|
29 |
model_name = model_name, #meta-llama/Meta-Llama-3-8B-Instruct meta-llama/Llama-2-7b-chat-hf #google/gemma-7b-it #HuggingFaceH4/zephyr-7b-beta
|
30 |
tokenizer_name = model_name,
|
31 |
context_window=3900,
|
32 |
+
# model_kwargs={"token": hf_token, "quantization_config": quantization_config},
|
33 |
+
model_kwargs={"token": hf_token}
|
34 |
tokenizer_kwargs={"token": hf_token},
|
35 |
max_new_tokens=400,
|
36 |
device_map="auto",
|