File size: 960 Bytes
4d29f91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from transformers import pipeline
import numpy as np
import torch
import transformers
import json
import pandas as pd
from numpy.random import seed
seed(1)
import emoji
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer  # PorterStemmer LancasterStemmer
from nltk.stem import WordNetLemmatizer
import re
stemmer = PorterStemmer()

# uncomment this when run first time 
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('stopwords')

lemmatizer = WordNetLemmatizer()
stopwords = nltk.corpus.stopwords.words('english')

import gradio as gr
pipe = pipeline("text-classification", model="dsmsb/16class_12k_newtest1618_xlm_roberta_base_27nov_v2_8epoch")
def classify(text):
    output = pipe(return,top_k = 2)
    return {"class":output}
inputs = gr.inputs.Textbox(label="pdf link")
outputs = gr.outputs.Textbox(label="OCR Text")
demo = gr.Interface(fn=classify,inputs=inputs,outputs=outputs)
demo.launch()