from flask import Flask import torch from torch import nn import re import numpy as np import pandas as pd from collections import OrderedDict # import requests # from bs4 import BeautifulSoup app = Flask(__name__) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if device == 'cuda:0': torch.cuda.set_device(device) print(device) def extract_text_from_link(url): response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') text = soup.get_text() return text doc = """The word "deep" in "deep learning" refers to the number of layers through which the data is transformed. More precisely, deep learning systems have a substantial credit assignment path (CAP) depth. The CAP is the chain of transformations from input to output. CAPs describe potentially causal connections between input and output. For a feedforward neural network, the depth of the CAPs is that of the network and is the number of hidden layers plus one (as the output layer is also parameterized). For recurrent neural networks, in which a signal may propagate through a layer more than once, the CAP depth is potentially unlimited.[13] No universally agreed-upon threshold of depth divides shallow learning from deep learning, but most researchers agree that deep learning involves CAP depth higher than 2. CAP of depth 2 has been shown to be a universal approximator in the sense that it can emulate any function.[14] Beyond that, more layers do not add to the function approximator ability of the network. Deep models (CAP > 2) are able to extract better features than shallow models and hence, extra layers help in learning the features effectively.""" class Text2Words: def __init__(self, document): self.text_all = re.findall(r'\b[A-Za-z]+\b', document) self.text = list(set(self.text_all)) self.chars_all = ''.join(self.text) self.chars = self.unique_chars(self.chars_all) self.int2char = dict(enumerate(self.chars)) self.char2int = {char: ind for ind, char in self.int2char.items()} self.maxlen = len(max(self.text, key=len)) self.update_text() self.input_seq_char, self.target_seq_char = self.get_seq_char(self.text) self.input_seq_index, self.target_seq_index = self.get_seq(self.char2int, self.input_seq_char, self.target_seq_char, len(self.text)) self.dict_size = len(self.char2int) self.seq_len = self.maxlen - 1 self.batch_size = len(self.text) self.input_seq = self.one_hot_encode(self.input_seq_index, self.dict_size, self.seq_len, self.batch_size) def one_hot_encode(self, sequence, dict_size, seq_len, batch_size): # Creating a multi-dimensional array of zeros with the desired output shape features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32) # Replacing the 0 at the relevant character index with a 1 to represent that character for i in range(batch_size): for u in range(seq_len): features[i, u, sequence[i][u]] = 1 return features def get_seq(self, char2int, input_seq_char, target_seq_char,n): x=[] y=[] for i in range(n): x.append([char2int[character] for character in input_seq_char[i]]) y.append([char2int[character] for character in target_seq_char[i]]) return x,y def get_seq_char(self, text): input_seq = [] target_seq = [] for i in range(len(text)): # Remove last character for input sequence input_seq.append(text[i][:-1]) # Remove first character for target sequence target_seq.append(text[i][1:]) return input_seq, target_seq def unique_chars(self, chars_all): chars = [] for letter in chars_all: if letter not in chars: chars.append(letter) # chars = sorted(chars) if ' ' not in chars: chars.append(' ') return sorted(chars) def update_text(self): for i in range(len(self.text)): while len(self.text[i])