Spaces:
Running
Running
File size: 872 Bytes
1bb02a4 da67385 1bb02a4 da67385 9d164d3 da67385 9d164d3 da67385 9d164d3 da67385 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
# Retrieve the token from environment variable
token = os.getenv("HF_TOKEN")
client = InferenceClient(
"meta-llama/Llama-3.2-3B-Instruct",
token=token,
)
def chat_with_llama(user_input):
response = ""
for message in client.chat_completion(
messages=[{"role": "user", "content": user_input}],
max_tokens=500,
stream=True,
):
response += message.choices[0].delta.content
return response
# Create a Gradio interface
interface = gr.Interface(
fn=chat_with_llama,
inputs=gr.Textbox(label="Input Text", placeholder="Ask something..."),
outputs="text",
title="Chat with Llama 3",
description="Enter your message to chat with Llama 3. Type your question or prompt below.",
)
if __name__ == "__main__":
interface.launch()
|