from openai import OpenAI import gradio as gr import os # Initialize the OpenAI Client with your API key and endpoint api_key = os.environ.get("RUNPOD_API_KEY") # Ensure your API key is correctly loaded from environment variables client = OpenAI( api_key=api_key, base_url="https://api.runpod.ai/v2/vllm-k0g4c60zor9xuu/openai/v1", ) def predict(message, history): # Format the history for OpenAI history_openai_format = [] for human, assistant in history: history_openai_format.append({"role": "user", "content": human}) history_openai_format.append({"role": "assistant", "content": assistant}) history_openai_format.append({"role": "user", "content": message}) response = client.chat.completions.create( model='ambrosfitz/llama-3-history', messages=history_openai_format, temperature=0, max_tokens=150, stream=False # Set to False for simplicity in this example ) # Ensure response handling is correct if response.choices and response.choices[0].message['content']: response_text = response.choices[0].message['content'].strip() history.append((message, response_text)) # Update history with the new Q&A pair return response_text else: return "No response generated." demo = gr.ChatInterface( fn=predict, title="HistoryBot Chat", description="Interact with HistoryBot, a specialized assistant for American History. Ask any historical questions to get detailed and nuanced answers." ) if __name__ == "__main__": demo.launch()