asv7j commited on
Commit
0fc7538
1 Parent(s): 871ef09

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import torch
3
+
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ device = "cpu"
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
8
+
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ "Qwen/Qwen2-0.5B-Instruct",
11
+ device_map="auto"
12
+ )
13
+
14
+ model1 = AutoModelForCausalLM.from_pretrained(
15
+ "Qwen/Qwen2-1.5B-Instruct",
16
+ device_map="auto"
17
+ )
18
+
19
+ app = FastAPI()
20
+
21
+ @app.get("/")
22
+ async def read_root():
23
+ return {"Hello": "World!"}
24
+
25
+
26
+ @app.post("/model")
27
+ async def model(data: dict):
28
+ prompt = data.get("prompt")
29
+ messages = [
30
+ {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
31
+ {"role": "user", "content": "Who are you?"},
32
+ {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
33
+ {"role": "user", "content": prompt}
34
+ ]
35
+ text = tokenizer.apply_chat_template(
36
+ messages,
37
+ tokenize=False,
38
+ add_generation_prompt=True
39
+ )
40
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
41
+ generated_ids = model.generate(
42
+ model_inputs.input_ids,
43
+ max_new_tokens=64,
44
+ do_sample=True
45
+ )
46
+ generated_ids = [
47
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
48
+ ]
49
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
50
+
51
+ return response
52
+
53
+ @app.post("/model1")
54
+ async def model1(data: dict):
55
+ prompt = data.get("prompt")
56
+ messages = [
57
+ {"role": "system", "content": "You are a helpful assistant, Sia, developed by Sushma. You will response in polity and brief."},
58
+ {"role": "user", "content": "Who are you?"},
59
+ {"role": "assistant", "content": "I am Sia, a small language model created by Sushma."},
60
+ {"role": "user", "content": prompt}
61
+ ]
62
+ text = tokenizer.apply_chat_template(
63
+ messages,
64
+ tokenize=False,
65
+ add_generation_prompt=True
66
+ )
67
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
68
+ generated_ids = model.generate(
69
+ model_inputs.input_ids,
70
+ max_new_tokens=64,
71
+ do_sample=True
72
+ )
73
+ generated_ids = [
74
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
75
+ ]
76
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
77
+
78
+ return response