themeetjani's picture
Update app.py
274543c
raw
history blame
No virus
959 Bytes
from transformers import pipeline
import numpy as np
import torch
import transformers
import json
import pandas as pd
from numpy.random import seed
seed(1)
import emoji
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer # PorterStemmer LancasterStemmer
from nltk.stem import WordNetLemmatizer
import re
stemmer = PorterStemmer()
# uncomment this when run first time
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('stopwords')
lemmatizer = WordNetLemmatizer()
stopwords = nltk.corpus.stopwords.words('english')
import gradio as gr
pipe = pipeline("text-classification", model="dsmsb/16class_12k_newtest1618_xlm_roberta_base_27nov_v2_8epoch")
def classify(text):
output = pipe(text,top_k = 2)
return {"class": output}
inputs = gr.inputs.Textbox(label="pdf link")
outputs = gr.outputs.Textbox(label="OCR Text")
demo = gr.Interface(fn=classify,inputs=inputs,outputs=outputs)
demo.launch()