Jikkata commited on
Commit
da67385
1 Parent(s): 3024992

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -45
app.py CHANGED
@@ -1,51 +1,33 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
- import tempfile
5
  import os
 
 
6
 
7
- # Create the offload folder
8
- offload_dir = './offload'
9
- os.makedirs(offload_dir, exist_ok=True)
10
 
11
- #"tiiuae/falcon-7b-instruct",
12
- model = AutoModelForCausalLM.from_pretrained(
13
- "meta-llama/Meta-Llama-3-8B",
14
- torch_dtype=torch.bfloat16,
15
- device_map="auto",
16
- low_cpu_mem_usage=True,
17
- offload_folder=offload_dir
18
  )
19
- tokenizer = AutoTokenizer.from_pretrained("tiiuae/meta-llama/Meta-Llama-3-8B")
20
-
21
-
22
- def generate_text(input_text):
23
- input_ids = tokenizer.encode(input_text, return_tensors="pt")
24
- attention_mask = torch.ones(input_ids.shape)
25
-
26
- output = model.generate(
27
- input_ids,
28
- attention_mask=attention_mask,
29
- max_length=200,
30
- do_sample=True,
31
- top_k=10,
32
- num_return_sequences=1,
33
- eos_token_id=tokenizer.eos_token_id,
34
- )
35
-
36
- output_text = tokenizer.decode(output[0], skip_special_tokens=True)
37
- print(output_text)
38
-
39
- # Remove Prompt Echo from Generated Text
40
- cleaned_output_text = output_text.replace(input_text, "")
41
- return cleaned_output_text
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- text_generation_interface = gr.Interface(
45
- fn=generate_text,
46
- inputs=[
47
- gr.inputs.Textbox(label="Input Text"),
48
- ],
49
- outputs=gr.inputs.Textbox(label="Generated Text"),
50
- title="---LLM---",
51
- ).launch()
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ from huggingface_hub import InferenceClient
4
 
5
+ # Retrieve the token from environment variable
6
+ token = os.getenv("HF_TOKEN")
 
7
 
8
+ client = InferenceClient(
9
+ "meta-llama/Llama-3.2-3B-Instruct",
10
+ token=token,
 
 
 
 
11
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
+ def chat_with_llama(user_input):
14
+ response = ""
15
+ for message in client.chat_completion(
16
+ messages=[{"role": "user", "content": user_input}],
17
+ max_tokens=500,
18
+ stream=True,
19
+ ):
20
+ response += message.choices[0].delta.content
21
+ return response
22
+
23
+ # Create a Gradio interface
24
+ interface = gr.Interface(
25
+ fn=chat_with_llama,
26
+ inputs=gr.Textbox(label="Input Text", placeholder="Ask something..."),
27
+ outputs="text",
28
+ title="Chat with Llama 3",
29
+ description="Enter your message to chat with Llama 3. Type your question or prompt below.",
30
+ )
31
 
32
+ if __name__ == "__main__":
33
+ interface.launch()