Spaces:
Sleeping
Sleeping
# encoding = "utf-8" | |
''' | |
This is a mediator: a gradio server for OpenAI APIs | |
''' | |
import os | |
import json | |
import argparse | |
import gradio as gr | |
import requests | |
import openai | |
from openai import OpenAI, Stream,AzureOpenAI | |
def http_bot(messages, argsbox): | |
args = json.loads(argsbox) | |
messages = json.loads(messages) | |
print(messages) | |
print(argsbox) | |
# client = OpenAI(api_key=args["api_key"], base_url = args["base_url"]) | |
client = AzureOpenAI( | |
api_version=args.api_version, | |
azure_endpoint=args.api_base, | |
api_key=args.api_key, | |
) | |
OpenAI(timeout=60, max_retries=3, base_url=args.api_base,api_key=args.api_key) | |
# n = 0 | |
# while True: | |
# try: | |
chat_completion = client.chat.completions.create( | |
messages=messages, | |
model=args["model"], | |
temperature=float(args["temperature"]), | |
max_tokens=int(args["max_tokens"]) | |
) | |
# break | |
# except Exception as e: | |
# continue | |
print(chat_completion) | |
return chat_completion.choices[0].message.content | |
with gr.Blocks() as demo: | |
gr.Markdown("# vLLM text completion demo\n") | |
inputbox = gr.Textbox(label="Input", | |
placeholder="Enter text and press ENTER") | |
argsbox = gr.Textbox(label="Args", placeholder="a dict of {api_key, base_url, model, temperature, max_tokens}") | |
outputbox = gr.Textbox(label="Output", | |
placeholder="Generated result from the model") | |
submit = gr.Button("Submit") | |
submit.click(http_bot, [inputbox, argsbox], [outputbox], api_name="submit") | |
demo.launch(share=True) | |