# encoding = "utf-8" ''' This is a mediator: a gradio server for OpenAI APIs ''' import os import json import argparse import gradio as gr import requests import openai from openai import OpenAI, Stream,AzureOpenAI def http_bot(messages, argsbox): args = json.loads(argsbox) messages = json.loads(messages) print(messages) print(argsbox) # client = OpenAI(api_key=args["api_key"], base_url = args["base_url"]) client = AzureOpenAI( api_version=args.api_version, azure_endpoint=args.api_base, api_key=args.api_key, ) OpenAI(timeout=60, max_retries=3, base_url=args.api_base,api_key=args.api_key) # n = 0 # while True: # try: chat_completion = client.chat.completions.create( messages=messages, model=args["model"], temperature=float(args["temperature"]), max_tokens=int(args["max_tokens"]) ) # break # except Exception as e: # continue print(chat_completion) return chat_completion.choices[0].message.content with gr.Blocks() as demo: gr.Markdown("# vLLM text completion demo\n") inputbox = gr.Textbox(label="Input", placeholder="Enter text and press ENTER") argsbox = gr.Textbox(label="Args", placeholder="a dict of {api_key, base_url, model, temperature, max_tokens}") outputbox = gr.Textbox(label="Output", placeholder="Generated result from the model") submit = gr.Button("Submit") submit.click(http_bot, [inputbox, argsbox], [outputbox], api_name="submit") demo.launch(share=True)