Spaces:

Jikkata
/

space_for_the_llm

Running

space_for_the_llm / app.py

Update app.py

da67385 verified 3 days ago

No virus

872 Bytes

	import os
	import gradio as gr
	from huggingface_hub import InferenceClient

	# Retrieve the token from environment variable
	token = os.getenv("HF_TOKEN")

	client = InferenceClient(
	"meta-llama/Llama-3.2-3B-Instruct",
	token=token,
	)

	def chat_with_llama(user_input):
	response = ""
	for message in client.chat_completion(
	messages=[{"role": "user", "content": user_input}],
	max_tokens=500,
	stream=True,
	):
	response += message.choices[0].delta.content
	return response

	# Create a Gradio interface
	interface = gr.Interface(
	fn=chat_with_llama,
	inputs=gr.Textbox(label="Input Text", placeholder="Ask something..."),
	outputs="text",
	title="Chat with Llama 3",
	description="Enter your message to chat with Llama 3. Type your question or prompt below.",
	)

	if __name__ == "__main__":
	interface.launch()