Spaces:

080-ai
/

cutlass_v1

Runtime error

File size: 1,585 Bytes

23a2abf
aad58a2
 
 
192371d
 
aad58a2
192371d
aad58a2
 
 
23a2abf
 
192371d
 
 
 
 
23a2abf
 
 
192371d
aad58a2
ba1ca3f
23a2abf
aad58a2
23c61db
65d8d39
 
 
23a2abf
 
 
 
62193ce
23a2abf
192371d
4723a07
23a2abf
aad58a2
 
23a2abf

from openai import OpenAI
import gradio as gr
import os

# Initialize the OpenAI Client with your API key and endpoint
api_key = os.environ.get("RUNPOD_API_KEY")  # Ensure your API key is correctly loaded from environment variables
client = OpenAI(
    api_key=api_key,
    base_url="https://api.runpod.ai/v2/vllm-k0g4c60zor9xuu/openai/v1",
)

def predict(message, history):
    # Format the history for OpenAI
    history_openai_format = []
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})
  
    response = client.chat.completions.create(
        model='ambrosfitz/llama-3-history',
        messages=history_openai_format,
        temperature=0,
        max_tokens=150,
        stream=False  # Set to False for simplicity in this example
    )

    # Ensure response handling is correct
    if response.choices and response.choices[0].message['content']:
        response_text = response.choices[0].message['content'].strip()
        history.append((message, response_text))  # Update history with the new Q&A pair
        return response_text
    else:
        return "No response generated."

demo = gr.ChatInterface(
    fn=predict,
    title="HistoryBot Chat",
    description="Interact with HistoryBot, a specialized assistant for American History. Ask any historical questions to get detailed and nuanced answers."
)

if __name__ == "__main__":
    demo.launch()