Yhhxhfh commited on
Commit
a7c9214
1 Parent(s): 2ed9d6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -10,7 +10,6 @@ import asyncio
10
  import random
11
  from io import BytesIO
12
  import requests
13
- import tempfile
14
 
15
  app = FastAPI()
16
  load_dotenv()
@@ -34,21 +33,24 @@ class ModelManager:
34
  def load_unified_model(self):
35
  model_configs = [
36
  {
37
- "repo_id": "unsloth/Llama-3.2-3B-Instruct-GGUF",
38
- "filename": "Llama-3.2-3B-Instruct-Q4_K_M.gguf",
39
- "name": "Llama-3.2-3B-Instruct-GGUF"
40
  },
41
  ]
42
 
43
  models = []
44
  for config in model_configs:
45
- with BytesIO() as model_data:
46
  download_url = f"https://huggingface.co/{config['repo_id']}/resolve/main/{config['filename']}"
47
- response = requests.get(download_url, headers={"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"})
48
- model_data.write(response.content)
49
- model_data.seek(0)
50
-
51
- model = Llama(model_path=" ", model_data=model_data.read(), **self.params)
 
 
 
 
52
  models.append(model)
53
 
54
  self.params["tokens"] = models[0].tokenize(b"Hello")
 
10
  import random
11
  from io import BytesIO
12
  import requests
 
13
 
14
  app = FastAPI()
15
  load_dotenv()
 
33
  def load_unified_model(self):
34
  model_configs = [
35
  {
36
+ "repo_id": "TheBloke/Llama-2-7B-Chat-GGUF",
37
+ "filename": "llama-2-7b-chat.Q4_K_M.gguf",
 
38
  },
39
  ]
40
 
41
  models = []
42
  for config in model_configs:
43
+ with BytesIO() as model_data:
44
  download_url = f"https://huggingface.co/{config['repo_id']}/resolve/main/{config['filename']}"
45
+ response = requests.get(download_url, headers={"Authorization": f"Bearer {HUGGINGFACE_TOKEN}", "stream": True})
46
+
47
+ for chunk in response.iter_content(chunk_size=1024*1024):
48
+ if chunk:
49
+ model_data.write(chunk)
50
+
51
+ model_data.seek(0)
52
+
53
+ model = Llama(model_path="", model_data=model_data.read(), **self.params)
54
  models.append(model)
55
 
56
  self.params["tokens"] = models[0].tokenize(b"Hello")