import base64 import requests from io import BytesIO from PIL import Image import gradio as gr def encode_image(img): """ Encodes a PIL Image to a base64 string in PNG format. """ buffered = BytesIO() img.save(buffered, format="PNG") encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8") return f"data:image/png;base64,{encoded_string}" def get_api_response(api_key, user_inputs): """ Sends the user message and image to the Hyperbolic API and retrieves the response. """ if not api_key: return {"error": "API key is required."} if not user_inputs.get("text") and not user_inputs.get("image"): return {"error": "Please provide a text message, an image, or both."} try: headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}", } messages = [] if user_inputs.get("text"): messages.append({ "type": "text", "text": user_inputs["text"] }) if user_inputs.get("image"): # Encode the uploaded image to a base64 string base64_img = encode_image(user_inputs["image"]) messages.append({ "type": "image_url", "image_url": {"url": base64_img} }) payload = { "messages": [ { "role": "user", "content": messages, } ], "model": "Qwen/Qwen2-VL-72B-Instruct", "max_tokens": 2048, "temperature": 0.7, "top_p": 0.9, } api_endpoint = "https://api.hyperbolic.xyz/v1/chat/completions" response = requests.post(api_endpoint, headers=headers, json=payload) # Check if the request was successful if response.status_code == 200: api_response = response.json() # Extract the AI's reply (assuming the response structure) ai_reply = api_response.get("choices", [{}])[0].get("message", {}).get("content", "No response content.") return {"response": ai_reply} else: return {"error": f"API Error: {response.status_code} - {response.text}"} except Exception as e: return {"error": str(e)} def chatbot_response(api_key, user_inputs, history): """ Handles the chatbot interaction by updating the conversation history. """ user_text = user_inputs.get("text") user_image = user_inputs.get("image") # Prepare user content user_content = [] if user_text: user_content.append(user_text) if user_image: # Convert the PIL Image to a base64 string img_data = encode_image(user_image) user_content.append(img_data) # Append the user's message to the history if user_content: history.append((user_content, None)) # Get the API response api_result = get_api_response(api_key, user_inputs) if "error" in api_result: ai_message = f"Error: {api_result['error']}" bot_content = [ai_message] else: ai_message = api_result["response"] bot_content = [ai_message] # Append the AI's response to the history history[-1] = (history[-1][0], bot_content) return history, history # Define the Gradio interface with gr.Blocks() as demo: gr.Markdown( """ # 🖼️ Qwen2-VL-72B-Instruct with Hyperbolic API Engage in a conversation with the AI by sending text messages and/or uploading images. Enter your Hyperbolic API key to get started. """ ) with gr.Row(): api_key_input = gr.Textbox( label="🔑 Hyperbolic API Key", type="password", placeholder="Enter your API key here", interactive=True ) chatbot = gr.Chatbot(label="💬 Chatbot") # Removed `.style()` method with gr.Row(): chat_input = gr.MultimodalTextbox( label="Your Input", placeholder="Type your message and/or upload an image...", file_count="multiple", # Allows multiple files if needed interactive=True ) send_button = gr.Button("📤 Send") # Hidden state to keep track of the conversation history state = gr.State([]) send_button.click( fn=chatbot_response, inputs=[api_key_input, chat_input, state], outputs=[chatbot, state] ) gr.Markdown( """ --- **Note:** Your API key is used only for this session and is not stored. Ensure you trust the environment in which you're running this application. """ ) # Launch the Gradio app if __name__ == "__main__": demo.launch()