File size: 872 Bytes
1bb02a4
da67385
 
1bb02a4
da67385
 
9d164d3
da67385
 
 
9d164d3
 
da67385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d164d3
da67385
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
import gradio as gr
from huggingface_hub import InferenceClient

# Retrieve the token from environment variable
token = os.getenv("HF_TOKEN")

client = InferenceClient(
    "meta-llama/Llama-3.2-3B-Instruct",
    token=token,
)

def chat_with_llama(user_input):
    response = ""
    for message in client.chat_completion(
        messages=[{"role": "user", "content": user_input}],
        max_tokens=500,
        stream=True,
    ):
        response += message.choices[0].delta.content
    return response

# Create a Gradio interface
interface = gr.Interface(
    fn=chat_with_llama,
    inputs=gr.Textbox(label="Input Text", placeholder="Ask something..."),
    outputs="text",
    title="Chat with Llama 3",
    description="Enter your message to chat with Llama 3. Type your question or prompt below.",
)

if __name__ == "__main__":
    interface.launch()