nazneen commited on
Commit
7226095
β€’
1 Parent(s): 7391e69

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+
5
+ theme = gr.themes.Monochrome(
6
+ primary_hue="indigo",
7
+ secondary_hue="blue",
8
+ neutral_hue="slate",
9
+ radius_size=gr.themes.sizes.radius_sm,
10
+ font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
11
+ )
12
+
13
+
14
+ instruct_pipeline_3b = pipeline(model="tiiuae/falcon-7b-instruct", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
15
+ instruct_pipeline_7b = pipeline(model="serpdotai/llama-oasst-lora-7B", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
16
+ #instruct_pipeline_12b = pipeline(model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
17
+
18
+ def generate(query, temperature, top_p, top_k, max_new_tokens):
19
+ return [instruct_pipeline_3b(query, temperature, top_p, top_k, max_new_tokens), instruct_pipeline_7b(query, temperature, top_p, top_k, max_new_tokens)]
20
+
21
+
22
+
23
+ examples = [
24
+ "How many helicopters can a human eat in one sitting?",
25
+ "What is an alpaca? How is it different from a llama?",
26
+ "Write an email to congratulate new employees at Hugging Face and mention that you are excited about meeting them in person.",
27
+ "What happens if you fire a cannonball directly at a pumpkin at high speeds?",
28
+ "Explain the moon landing to a 6 year old in a few sentences.",
29
+ "Why aren't birds real?",
30
+ "How can I steal from a grocery store without getting caught?",
31
+ "Why is it important to eat socks after meditating?",
32
+ ]
33
+
34
+ def process_example(args):
35
+ for x in generate(args):
36
+ pass
37
+ return x
38
+ css = ".generating {visibility: hidden}"
39
+
40
+ with gr.Blocks(theme=theme) as demo:
41
+ gr.Markdown(
42
+ """<h1><center>Falcon 7B vs. LLaMA 7B instruction tuned</center></h1>
43
+
44
+ """
45
+ )
46
+ with gr.Row():
47
+ with gr.Column():
48
+ with gr.Row():
49
+ instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
50
+ with gr.Row():
51
+ with gr.Column():
52
+ with gr.Row():
53
+ temperature = gr.Slider(
54
+ label="Temperature",
55
+ value=0.5,
56
+ minimum=0.0,
57
+ maximum=2.0,
58
+ step=0.1,
59
+ interactive=True,
60
+ info="Higher values produce more diverse outputs",
61
+ )
62
+ with gr.Column():
63
+ with gr.Row():
64
+ top_p = gr.Slider(
65
+ label="Top-p (nucleus sampling)",
66
+ value=0.95,
67
+ minimum=0.0,
68
+ maximum=1,
69
+ step=0.05,
70
+ interactive=True,
71
+ info="Higher values sample fewer low-probability tokens",
72
+ )
73
+ with gr.Column():
74
+ with gr.Row():
75
+ top_k = gr.Slider(
76
+ label="Top-k",
77
+ value=50,
78
+ minimum=0.0,
79
+ maximum=100,
80
+ step=1,
81
+ interactive=True,
82
+ info="Sample from a shortlist of top-k tokens",
83
+ )
84
+ with gr.Column():
85
+ with gr.Row():
86
+ max_new_tokens = gr.Slider(
87
+ label="Maximum new tokens",
88
+ value=256,
89
+ minimum=0,
90
+ maximum=2048,
91
+ step=5,
92
+ interactive=True,
93
+ info="The maximum number of new tokens to generate",
94
+ )
95
+ with gr.Row():
96
+ submit = gr.Button("Generate Answers")
97
+ with gr.Row():
98
+ with gr.Column():
99
+ with gr.Box():
100
+ gr.Markdown("**Falcon 7B instruct**")
101
+ output_3b = gr.Markdown()
102
+ with gr.Column():
103
+ with gr.Box():
104
+ gr.Markdown("**LLaMA 7B instruct**")
105
+ output_7b = gr.Markdown()
106
+ # with gr.Column():
107
+ # with gr.Box():
108
+ # gr.Markdown("**Dolly 12B**")
109
+ # output_12b = gr.Markdown()
110
+
111
+ with gr.Row():
112
+ gr.Examples(
113
+ examples=examples,
114
+ inputs=[instruction],
115
+ cache_examples=False,
116
+ fn=process_example,
117
+ outputs=[output_3b, output_7b],
118
+ )
119
+ submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=[output_3b, output_7b ])
120
+ instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens ], outputs=[output_3b, output_7b])
121
+
122
+ demo.queue(concurrency_count=16).launch(debug=True)