ttnts commited on
Commit
f6e2cd9
1 Parent(s): 12180f2

adding app

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import subprocess
4
+ import torch
5
+ import spaces
6
+ import gradio as gr
7
+ from threading import Thread
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+ from huggingface_hub import HfApi
10
+ from datetime import datetime
11
+
12
+ MODEL_ID = os.environ.get("MODEL_ID")
13
+ DATASET_REPO = os.environ.get("DATASET_REPO")
14
+ DESCRIPTION = os.environ.get("DESCRIPTION")
15
+ PROMPT = os.environ.get("PROMPT")
16
+
17
+ ON_LOAD="""
18
+ async()=>{
19
+ alert("Before using the service, users must agree to the following terms:\\n\\nPlease note that the model presented here is an experimental tool that is still being developed and improved.\\n\\nMeasures have been taken during the model creation process to minimizing the risk of generating vulgar, prohibited or inappropriate content. However, in rare cases, unwanted content may be generated. If you encounter any content that is deemed inappropriate or violates our policies, please contact us to report it. Your information will enable us to take further steps to improve and develop the model to make it safe and user-friendly.\\n\\nYou must not use the model for illegal, harmful, violent, racist or sexual purposes. Please do not send any private information. The website collects user dialogue data and reserves the right to distribute it under the Creative Commons Attribution (CC-BY) or similar license.");
20
+ }
21
+ """
22
+
23
+ api = HfApi()
24
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
25
+
26
+
27
+ @spaces.GPU()
28
+ def generate(instruction, stop_tokens, repetition_penalty, max_new_tokens):
29
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
30
+ input_ids = tokenizer.apply_chat_template(instruction, return_tensors="pt", add_generation_prompt=True,).to(device)
31
+
32
+ if input_ids.shape[1] > 4096:
33
+ input_ids = input_ids[:, -4096:]
34
+
35
+ generate_kwargs = dict(
36
+ input_ids = input_ids,
37
+ streamer=streamer,
38
+ do_sample=False,
39
+ max_new_tokens=max_new_tokens,
40
+ repetition_penalty=repetition_penalty,
41
+ )
42
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
43
+ t.start()
44
+ outputs = []
45
+ for new_token in streamer:
46
+ if new_token in stop_tokens:
47
+ break
48
+ outputs.append(new_token.replace("<|im_end|>", ""))
49
+ yield "".join(outputs)
50
+
51
+
52
+ def predict(message, history):
53
+ system_prompt = PROMPT
54
+ repetition_penalty = 1.1
55
+ max_new_tokens = 1024
56
+ stop_tokens = ["<|endoftext|>", "<|im_end|>"]
57
+ conversation = []
58
+ conversation.append(
59
+ {
60
+ "role": "system",
61
+ "content": system_prompt,
62
+ }
63
+ )
64
+ for user, assistant in history:
65
+ conversation.extend([{"role": "user", "content": user},
66
+ {"role": "assistant", "content": assistant}])
67
+ conversation.append({"role": "user", "content": message})
68
+ print(conversation)
69
+
70
+ for output_text in generate(conversation, stop_tokens, repetition_penalty, max_new_tokens):
71
+ if output_text in stop_tokens:
72
+ break
73
+ yield output_text
74
+
75
+ hfapi = HfApi()
76
+ day=datetime.now().strftime("%Y-%m-%d")
77
+ timestamp=datetime.now().timestamp()
78
+ dd={
79
+ 'message': message,
80
+ 'history': history,
81
+ 'system_prompt':system_prompt,
82
+ 'max_new_tokens':max_new_tokens,
83
+ 'repetition_penalty':repetition_penalty,
84
+ 'instruction':conversation,
85
+ 'output':output_text,
86
+ 'precision': 'auto '+str(model.dtype),
87
+ }
88
+ hfapi.upload_file(
89
+ path_or_fileobj=json.dumps(dd, indent=2, ensure_ascii=False).encode('utf-8'),
90
+ path_in_repo=f"{day}/{timestamp}.json",
91
+ repo_id=DATASET_REPO,
92
+ repo_type="dataset",
93
+ commit_message=f"X",
94
+ run_as_future=True
95
+ )
96
+
97
+ def vote(chatbot, data: gr.LikeData):
98
+ day=datetime.now().strftime("%Y-%m-%d")
99
+ timestamp=datetime.now().timestamp()
100
+ api.upload_file(
101
+ path_or_fileobj=json.dumps({"history":chatbot, 'index': data.index, 'liked': data.liked}, indent=2, ensure_ascii=False).encode('utf-8'),
102
+ path_in_repo=f"liked/{day}/{timestamp}.json",
103
+ repo_id=DATASET_REPO,
104
+ repo_type="dataset",
105
+ commit_message=f"L",
106
+ run_as_future=True
107
+ )
108
+
109
+ with gr.Blocks(js=ON_LOAD) as demo:
110
+ chatbot = gr.Chatbot(label="Chatbot", likeable=True, render=False)
111
+ chatbot.like(vote, [chatbot], None)
112
+ gr.ChatInterface(
113
+ predict,
114
+ chatbot=chatbot,
115
+ title="MKLLM-7B-Instruct",
116
+ description=DESCRIPTION,
117
+ examples=[
118
+ ["Кој си ти?"],
119
+ ["Колку е 3+6/3-1?"],
120
+ ["Како би го населиле Марс? Биди краток."],
121
+ ["Напиши ми pyhon функција која прима низа броеви и проверува кои броеви се деливи со 7, оние што се деливи ги сместуваме во нова листа која ја враќаме назад."]
122
+ ]
123
+ )
124
+
125
+ if __name__ == "__main__":
126
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
127
+ tokenizer.eos_token = "<|im_end|>"
128
+ tokenizer.pad_token = tokenizer.eos_token
129
+ model = AutoModelForCausalLM.from_pretrained(
130
+ MODEL_ID,
131
+ device_map=device,
132
+ torch_dtype='auto',
133
+ )
134
+
135
+ demo.queue(max_size=50).launch()