import gradio as gr from gradio_client import Client client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/") title = "Llama2 70B Chatbot" description = """ This Space demonstrates model [Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) by Meta, a Llama 2 model with 70B parameters fine-tuned for chat instructions. """ css = """.toast-wrap { display: none !important } """ examples=[ ['Hello there! How are you doing?'], ['Can you explain to me briefly what is Python programming language?'], ['Explain the plot of Cinderella in a sentence.'], ['How many hours does it take a man to eat a Helicopter?'], ["Write a 100-word article on 'Benefits of Open-Source in AI research'"], ] # Stream text def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=4096): return client.predict( message, # str in 'Message' Textbox component system_prompt, # str in 'Optional system prompt' Textbox component temperature, # int | float (numeric value between 0.0 and 1.0) max_new_tokens, # int | float (numeric value between 0 and 4096) 0.3, # int | float (numeric value between 0.0 and 1) 1, # int | float (numeric value between 1.0 and 2.0) api_name="/chat" ) additional_inputs=[ gr.Textbox("", label="Optional system prompt"), gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ), gr.Slider( label="Max new tokens", value=4096, minimum=0, maximum=4096, step=64, interactive=True, info="The maximum numbers of new tokens", ) ] # Gradio Demo with gr.Blocks(theme=gr.themes.Base()) as demo: gr.ChatInterface(predict, title=title, description=description, css=css, examples=examples, additional_inputs=additional_inputs) demo.queue().launch(debug=True)