kumar9 commited on
Commit
67c57d4
1 Parent(s): 3c0be11

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +205 -1
main.py CHANGED
@@ -10,6 +10,210 @@ from collections import OrderedDict
10
 
11
  app = Flask(__name__)
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  @app.route('/')
14
  def home():
15
- return {'key':'Hello HuggingFace!'}
 
 
 
10
 
11
  app = Flask(__name__)
12
 
13
+
14
+ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
15
+ if device == 'cuda:0':
16
+ torch.cuda.set_device(device)
17
+ print(device)
18
+
19
+ def extract_text_from_link(url):
20
+ response = requests.get(url)
21
+ soup = BeautifulSoup(response.content, 'html.parser')
22
+ text = soup.get_text()
23
+ return text
24
+
25
+
26
+
27
+ doc = """The word "deep" in "deep learning" refers to the number of layers through which the data is transformed. More precisely,
28
+ deep learning systems have a substantial credit assignment path (CAP) depth. The CAP is the chain of transformations from input to
29
+ output. CAPs describe potentially causal connections between input and output. For a feedforward neural network, the depth of the
30
+ CAPs is that of the network and is the number of hidden layers plus one (as the output layer is also parameterized). For recurrent
31
+ neural networks, in which a signal may propagate through a layer more than once, the CAP depth is potentially unlimited.[13] No
32
+ universally agreed-upon threshold of depth divides shallow learning from deep learning, but most researchers agree that deep
33
+ learning involves CAP depth higher than 2. CAP of depth 2 has been shown to be a universal approximator in the sense that it
34
+ can emulate any function.[14] Beyond that, more layers do not add to the function approximator ability of the network. Deep
35
+ models (CAP > 2) are able to extract better features than shallow models and hence, extra layers help in learning the features
36
+ effectively."""
37
+
38
+
39
+ class Text2Words:
40
+ def __init__(self, document):
41
+ self.text_all = re.findall(r'\b[A-Za-z]+\b', document)
42
+ self.text = list(set(self.text_all))
43
+ self.chars_all = ''.join(self.text)
44
+ self.chars = self.unique_chars(self.chars_all)
45
+ self.int2char = dict(enumerate(self.chars))
46
+ self.char2int = {char: ind for ind, char in self.int2char.items()}
47
+ self.maxlen = len(max(self.text, key=len))
48
+ self.update_text()
49
+ self.input_seq_char, self.target_seq_char = self.get_seq_char(self.text)
50
+ self.input_seq_index, self.target_seq_index = self.get_seq(self.char2int, self.input_seq_char, self.target_seq_char, len(self.text))
51
+ self.dict_size = len(self.char2int)
52
+ self.seq_len = self.maxlen - 1
53
+ self.batch_size = len(self.text)
54
+ self.input_seq = self.one_hot_encode(self.input_seq_index, self.dict_size, self.seq_len, self.batch_size)
55
+
56
+ def one_hot_encode(self, sequence, dict_size, seq_len, batch_size):
57
+ # Creating a multi-dimensional array of zeros with the desired output shape
58
+ features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
59
+
60
+ # Replacing the 0 at the relevant character index with a 1 to represent that character
61
+ for i in range(batch_size):
62
+ for u in range(seq_len):
63
+ features[i, u, sequence[i][u]] = 1
64
+ return features
65
+
66
+ def get_seq(self, char2int, input_seq_char, target_seq_char,n):
67
+ x=[]
68
+ y=[]
69
+ for i in range(n):
70
+ x.append([char2int[character] for character in input_seq_char[i]])
71
+ y.append([char2int[character] for character in target_seq_char[i]])
72
+ return x,y
73
+
74
+ def get_seq_char(self, text):
75
+ input_seq = []
76
+ target_seq = []
77
+
78
+ for i in range(len(text)):
79
+ # Remove last character for input sequence
80
+ input_seq.append(text[i][:-1])
81
+ # Remove first character for target sequence
82
+ target_seq.append(text[i][1:])
83
+ return input_seq, target_seq
84
+
85
+ def unique_chars(self, chars_all):
86
+ chars = []
87
+ for letter in chars_all:
88
+ if letter not in chars:
89
+ chars.append(letter)
90
+ # chars = sorted(chars)
91
+ if ' ' not in chars:
92
+ chars.append(' ')
93
+ return sorted(chars)
94
+
95
+ def update_text(self):
96
+ for i in range(len(self.text)):
97
+ while len(self.text[i])<self.maxlen:
98
+ self.text[i] += ' '
99
+
100
+ def description(self):
101
+ text = {}
102
+ for word in self.text:
103
+ char = word[0]
104
+ if char not in text:
105
+ text[char] = []
106
+ text[char].append(word.strip())
107
+ for k,v in (sorted(text.items())):
108
+ print(f'{k} : {sorted(v)}')
109
+
110
+ def lengt_analysis(self):
111
+ text = {}
112
+ words = set(self.text_all)
113
+ for word in words:
114
+ n = len(word)
115
+ if n not in text:
116
+ text[n] = []
117
+ text[n].append(word.strip())
118
+ for k,v in (sorted(text.items())):
119
+ print(f'{k} : count = {len(v)} list = {sorted(v)}')
120
+ return None # text
121
+
122
+
123
+ def create_object(doc):
124
+ return Text2Words(doc)
125
+
126
+
127
+ def get_inputs(obj):
128
+ input_seq = torch.tensor(obj.input_seq, device=device)
129
+ target_seq_index = torch.tensor(obj.target_seq_index, device=device)
130
+ return input_seq, target_seq_index
131
+
132
+ class Model(nn.Module):
133
+ def __init__(self, input_size, output_size, hidden_dim, n_layers):
134
+ super(Model, self).__init__()
135
+
136
+ # Defining some parameters
137
+ self.hidden_dim = hidden_dim
138
+ self.n_layers = n_layers
139
+
140
+ #Defining the layers
141
+ # RNN Layer
142
+ self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
143
+ # Fully connected layer
144
+ self.fc = nn.Linear(hidden_dim, output_size)
145
+
146
+ def forward(self, x):
147
+ batch_size = x.size(0)
148
+ hidden = self.init_hidden(batch_size)
149
+ out, hidden = self.rnn(x, hidden)
150
+ out = out.contiguous().view(-1, self.hidden_dim)
151
+ out = self.fc(out)
152
+ return out, hidden
153
+
154
+ def init_hidden(self, batch_size):
155
+ # This method generates the first hidden state of zeros
156
+ torch.manual_seed(42)
157
+ hidden = torch.zeros((self.n_layers, batch_size, self.hidden_dim), device=device)
158
+ return hidden
159
+
160
+ def create_model(obj):
161
+ model = Model(input_size=obj.dict_size, output_size=obj.dict_size, hidden_dim=2*obj.dict_size, n_layers=1)
162
+ model.to(device)
163
+ lr=0.01
164
+ criterion = nn.CrossEntropyLoss()
165
+ optimizer = torch.optim.Adam(model.parameters(), lr=lr)
166
+ return model, criterion, optimizer
167
+
168
+ # This function takes in the model and character as arguments and returns the next character prediction and hidden state
169
+ def predict(model, character):
170
+ # One-hot encoding our input to fit into the model
171
+ # print(character)
172
+ character = np.array([[obj.char2int[c] for c in character]])
173
+ # print(character)
174
+ character = obj.one_hot_encode(character, obj.dict_size, character.shape[1], 1)
175
+ # print(character,character.shape)
176
+ character = torch.tensor(character, device=device)
177
+ character.to(device)
178
+ out, hidden = model(character)
179
+ # print(out, hidden)
180
+ prob = nn.functional.softmax(out[-1], dim=0).data
181
+ # print(prob)
182
+ char_ind = torch.max(prob, dim=0)[1].item()
183
+ # print(sorted(prob, reverse=True))
184
+ return obj.int2char[char_ind], hidden
185
+
186
+ # This function takes the desired output length and input characters as arguments, returning the produced sentence
187
+ def sample(model, out_len, start='h'):
188
+ model.eval() # eval mode
189
+ chars = [ch for ch in start]
190
+ char = chars[-1]
191
+ chars = chars[:-1]
192
+ # Now pass in the previous characters and get a new one
193
+ while char != ' ':
194
+ chars.append(char)
195
+ char, h = predict(model, chars)
196
+ return ''.join(chars)
197
+
198
+
199
+ def load_checkpoint(filepath):
200
+ checkpoint = torch.load(filepath)
201
+ # print(checkpoint['state_dict'])
202
+ model = checkpoint['model']
203
+ # print(model)
204
+ model.load_state_dict(checkpoint['state_dict'])
205
+ # print(model.parameters())
206
+ # for parameter in model.parameters():
207
+ # parameter.requires_grad = False
208
+ # print(parameter)
209
+
210
+
211
+ model.eval()
212
+ return model
213
+
214
+
215
  @app.route('/')
216
  def home():
217
+ model = load_checkpoint('checkpoint.pth')
218
+ res = sample(model, obj.maxlen, 'ap')
219
+ return {'key':res}