ohtaman commited on
Commit
419b7c9
1 Parent(s): 46398af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from peft import PeftModel, PeftConfig
2
+ from transformers import AutoModelForCausalLM
3
+ import torch
4
+ import gradio as gr
5
+
6
+
7
+ BASE_MODEL_NAME = "tiiuae/falcon-7b"
8
+ MODEL_NAME = "ohtaman/falcon-7b-kokkai2022-lora"
9
+
10
+ tokenizer = transformers.AutoTokenizer.from_pretrained(BASE_MODEL_NAME, torch_dtype=torch.bfloat16, trust_remote_code=True)
11
+ base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL_NAME, device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True)
12
+ model = PeftModel.from_pretrained(base_model, MODEL_NAME)
13
+
14
+
15
+ def generate_prompt(question: str, questioner: str="", answerer: str=""):
16
+ return f"""# question
17
+ {questioner}
18
+
19
+ {question}
20
+
21
+ # answer
22
+ {answerer}
23
+
24
+ """
25
+
26
+ def evaluate(
27
+ quetion: str,
28
+ questioner: str="",
29
+ answerer: str="",
30
+ temperature: float=0.1,
31
+ top_p: float=0.75,
32
+ top_k: int=40,
33
+ num_beams: int=4,
34
+ repetition_penalty: float=1.05,
35
+ outputs.sequences[0, input_length:-1]_tokens: int=256,
36
+ **kwargs
37
+ ):
38
+ prompt = generate_prompt(question, questioner, answerer)
39
+ inputs = tokenizer(prompt, return_tensors="pt")
40
+ input_ids = inputs["input_ids"].to(model.device)
41
+ n_input_tokens = input_ids.shape[1]
42
+
43
+ generation_config = GenerationConfig(
44
+ temperature=temperature,
45
+ top_p=top_p,
46
+ top_k=top_k,
47
+ num_beams=num_beams,
48
+ repetition_penalty=repetition_penalty,
49
+ **kwargs,
50
+ )
51
+ with torch.no_grad():
52
+ generation_output = model.generate(
53
+ input_ids=input_ids,
54
+ generation_config=generation_config,
55
+ return_dict_in_generate=True,
56
+ output_scores=True,
57
+ max_new_tokens=max_new_tokens,
58
+ )
59
+ s = generation_output.sequences[0, n_input_tokens:-1]
60
+ return tokenizer.decode(s)
61
+
62
+
63
+ g = gr.Interface(
64
+ fn=evaluate,
65
+ inputs=[
66
+ gr.components.Textbox(lines=5, label="Question", placeholder="Question"),
67
+ gr.components.Textbox(lines=1, label="Questioner", placeholder="Questioner"),
68
+ gr.components.Textbox(lines=1, label="Answerer", placeholder="Answerer"),
69
+ gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
70
+ gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
71
+ gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
72
+ gr.components.Slider(minimum=1, maximum=4, step=1, value=4, label="Beams"),
73
+ gr.components.Slider(minimum=0, maximum=2, step=0.01, value=1.05, label="Repetition Penalty"),
74
+ gr.components.Slider(minimum=1, maximum=512, step=1, value=128, label="Max tokens"),
75
+ ],
76
+ outputs=[
77
+ gr.inputs.Textbox(
78
+ lines=5,
79
+ label="Output",
80
+ )
81
+ ],
82
+ title="🏛️: Kokkai 2022",
83
+ description="falcon-7b-kokkai2022 is a 7B-parameter model trained on Japan's 2022 Diet proceedings using LoRA based on [tiiuae/falcon-7b](https://huggingface.co/tiiuae/falcon-7b).",
84
+ )
85
+ g.queue(concurrency_count=1)
86
+ g.launch()