seawolf2357 commited on
Commit
6ab04f4
1 Parent(s): 728fb17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -7
app.py CHANGED
@@ -18,7 +18,7 @@ def respond(
18
  message,
19
  history: list[tuple[str, str]],
20
  system_message="AI Assistant Role",
21
- max_tokens=2048,
22
  temperature=0.7,
23
  top_p=0.95,
24
  ):
@@ -30,7 +30,6 @@ def respond(
30
  memory.append((message, None))
31
 
32
  messages = [{"role": "system", "content": full_system_message}]
33
-
34
  # 메모리에서 대화 기록을 가져와 메시지 목록에 추가
35
  for val in memory:
36
  if val[0]:
@@ -42,7 +41,6 @@ def respond(
42
  "Authorization": f"Bearer {TOKEN}",
43
  "Content-Type": "application/json"
44
  }
45
-
46
  payload = {
47
  "model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
48
  "max_tokens": max_tokens,
@@ -50,7 +48,6 @@ def respond(
50
  "top_p": top_p,
51
  "messages": messages
52
  }
53
-
54
  response = requests.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload, stream=True)
55
 
56
  # Stream 방식으로 데이터를 출력
@@ -79,9 +76,8 @@ demo = gr.ChatInterface(
79
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
80
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
81
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
82
- ],
83
- streaming=True # 스트리밍 모드 활성화
84
  )
85
 
86
  if __name__ == "__main__":
87
- demo.queue().launch(max_threads=20)
 
18
  message,
19
  history: list[tuple[str, str]],
20
  system_message="AI Assistant Role",
21
+ max_tokens=512,
22
  temperature=0.7,
23
  top_p=0.95,
24
  ):
 
30
  memory.append((message, None))
31
 
32
  messages = [{"role": "system", "content": full_system_message}]
 
33
  # 메모리에서 대화 기록을 가져와 메시지 목록에 추가
34
  for val in memory:
35
  if val[0]:
 
41
  "Authorization": f"Bearer {TOKEN}",
42
  "Content-Type": "application/json"
43
  }
 
44
  payload = {
45
  "model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
46
  "max_tokens": max_tokens,
 
48
  "top_p": top_p,
49
  "messages": messages
50
  }
 
51
  response = requests.post("https://api-inference.huggingface.co/v1/chat/completions", headers=headers, json=payload, stream=True)
52
 
53
  # Stream 방식으로 데이터를 출력
 
76
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
77
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
78
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
79
+ ]
 
80
  )
81
 
82
  if __name__ == "__main__":
83
+ demo.queue().launch(max_threads=20)