ttagu99 commited on
Commit
0788398
1 Parent(s): 3e95aa0

add gitignore

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +32 -8
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ cache/
app.py CHANGED
@@ -10,16 +10,14 @@ import os
10
  import gradio as gr
11
  import requests
12
  import random
13
- # from dotenv import load_dotenv
14
  import googletrans
15
  translator = googletrans.Translator()
16
 
17
- # load_dotenv()
18
  model = None
19
  tokenizer = None
20
  generator = None
21
 
22
- os.environ["CUDA_VISIBLE_DEVICES"]="0"
23
 
24
  def load_model(model_name, eight_bit=0, device_map="auto"):
25
  global model, tokenizer, generator
@@ -32,20 +30,29 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
32
  gpu_count = torch.cuda.device_count()
33
  print('gpu_count', gpu_count)
34
 
 
 
 
 
 
35
  print(model_name)
36
  tokenizer = transformers.LLaMATokenizer.from_pretrained(model_name)
37
  model = transformers.LLaMAForCausalLM.from_pretrained(
38
  model_name,
39
  #device_map=device_map,
40
  #device_map="auto",
41
- torch_dtype=torch.float16,
42
  #max_memory = {0: "14GB", 1: "14GB", 2: "14GB", 3: "14GB",4: "14GB",5: "14GB",6: "14GB",7: "14GB"},
43
  #load_in_8bit=eight_bit,
44
  #from_tf=True,
45
  low_cpu_mem_usage=True,
46
  load_in_8bit=False,
47
  cache_dir="cache"
48
- ).cuda()
 
 
 
 
49
  generator = model.generate
50
 
51
  # chat doctor
@@ -68,7 +75,11 @@ def chatdoctor(input, state):
68
  print('fulltext: ',fulltext)
69
 
70
  generated_text = ""
71
- gen_in = tokenizer(fulltext, return_tensors="pt").input_ids.cuda()
 
 
 
 
72
  in_tokens = len(gen_in)
73
  print('len token',in_tokens)
74
  with torch.no_grad():
@@ -97,9 +108,22 @@ def chatdoctor(input, state):
97
 
98
  def predict(input, chatbot, state):
99
  print('predict state: ', state)
100
- en_input = translator.translate(input, src='ko', dest='en').text
 
 
 
 
 
 
 
 
101
  response = chatdoctor(en_input, state)
102
- ko_response = translator.translate(response, src='en', dest='ko').text
 
 
 
 
 
103
  state.append(response)
104
  chatbot.append((input, ko_response))
105
  return chatbot, state
 
10
  import gradio as gr
11
  import requests
12
  import random
 
13
  import googletrans
14
  translator = googletrans.Translator()
15
 
 
16
  model = None
17
  tokenizer = None
18
  generator = None
19
 
20
+ os.environ["CUDA_VISIBLE_DEVICES"]=""
21
 
22
  def load_model(model_name, eight_bit=0, device_map="auto"):
23
  global model, tokenizer, generator
 
30
  gpu_count = torch.cuda.device_count()
31
  print('gpu_count', gpu_count)
32
 
33
+ if torch.cuda.is_available():
34
+ torch_dtype = torch.float16
35
+ else:
36
+ torch_dtype = torch.float32
37
+
38
  print(model_name)
39
  tokenizer = transformers.LLaMATokenizer.from_pretrained(model_name)
40
  model = transformers.LLaMAForCausalLM.from_pretrained(
41
  model_name,
42
  #device_map=device_map,
43
  #device_map="auto",
44
+ torch_dtype=torch_dtype,
45
  #max_memory = {0: "14GB", 1: "14GB", 2: "14GB", 3: "14GB",4: "14GB",5: "14GB",6: "14GB",7: "14GB"},
46
  #load_in_8bit=eight_bit,
47
  #from_tf=True,
48
  low_cpu_mem_usage=True,
49
  load_in_8bit=False,
50
  cache_dir="cache"
51
+ )
52
+ if torch.cuda.is_available():
53
+ model = model.cuda()
54
+ else:
55
+ model = model.cpu()
56
  generator = model.generate
57
 
58
  # chat doctor
 
75
  print('fulltext: ',fulltext)
76
 
77
  generated_text = ""
78
+ gen_in = tokenizer(fulltext, return_tensors="pt").input_ids
79
+ if torch.cuda.is_available():
80
+ gen_in = gen_in.cuda()
81
+ else:
82
+ gen_in = gen_in.cpu()
83
  in_tokens = len(gen_in)
84
  print('len token',in_tokens)
85
  with torch.no_grad():
 
108
 
109
  def predict(input, chatbot, state):
110
  print('predict state: ', state)
111
+
112
+ # input에 한국어가 detect 되면 영어로 변경, 아니면 그대로
113
+ is_kor = True
114
+ if googletrans.Translator().detect(input).lang == 'ko':
115
+ en_input = translator.translate(input, src='ko', dest='en').text
116
+ else:
117
+ en_input = input
118
+ is_kor = False
119
+
120
  response = chatdoctor(en_input, state)
121
+
122
+ if is_kor:
123
+ ko_response = translator.translate(response, src='en', dest='ko').text
124
+ else:
125
+ ko_response = response
126
+
127
  state.append(response)
128
  chatbot.append((input, ko_response))
129
  return chatbot, state