Spaces:

mkoot007
/

Classification-testing

Runtime error

File size: 1,927 Bytes

406399b

import gradio as gr
import re
from docx import Document  # Use python-docx to read DOCX files
from PyPDF2 import PdfFileReader  # Import PdfFileReader from PyPDF2

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_file):
    text = ""
    pdf = PdfFileReader(pdf_file)
    for page_num in range(pdf.getNumPages()):
        page = pdf.getPage(page_num)
        text += page.extractText()
    return text

# Function to extract text from a DOCX file
def extract_text_from_docx(docx_file):
    doc = Document(docx_file)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

# Function to extract information from a resume
def extract_info_from_resume(resume_path):
    if resume_path.name.endswith('.pdf'):
        text = extract_text_from_pdf(resume_path)
    elif resume_path.name.endswith('.docx'):
        text = extract_text_from_docx(resume_path)
    else:
        raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")

    # Define regular expressions to extract information
    name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)"
    email_pattern = r"[\w\.-]+@[\w\.-]+"
    phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})"

    name = re.search(name_pattern, text)
    email = re.search(email_pattern, text)
    phone = re.search(phone_pattern, text)

    if name:
        name = name.group()
    else:
        name = "Name not found"
    if email:
        email = email.group()
    else:
        email = "Email not found"
    if phone:
        phone = phone.group()
    else:
        phone = "Phone number not found"

    extracted_info = {
        "Name": name,
        "Email": email,
        "Phone": phone,
    }

    return extracted_info

# Define a Gradio interface
iface = gr.Interface(
    fn=extract_info_from_resume,
    inputs=gr.inputs.File(type="file"),
    outputs="json"
)

# Deploy the Gradio interface
iface.launch(share=True)