Spaces:
Runtime error
Runtime error
import gradio as gr | |
import re | |
from docx import Document # Use python-docx to read DOCX files | |
from PyPDF2 import PdfFileReader # Import PdfFileReader from PyPDF2 | |
# Function to extract text from a PDF file | |
def extract_text_from_pdf(pdf_file): | |
text = "" | |
pdf = PdfFileReader(pdf_file) | |
for page_num in range(pdf.getNumPages()): | |
page = pdf.getPage(page_num) | |
text += page.extractText() | |
return text | |
# Function to extract text from a DOCX file | |
def extract_text_from_docx(docx_file): | |
doc = Document(docx_file) | |
text = "\n".join([para.text for para in doc.paragraphs]) | |
return text | |
# Function to extract information from a resume | |
def extract_info_from_resume(resume_path): | |
if resume_path.name.endswith('.pdf'): | |
text = extract_text_from_pdf(resume_path) | |
elif resume_path.name.endswith('.docx'): | |
text = extract_text_from_docx(resume_path) | |
else: | |
raise ValueError("Unsupported file format. Only PDF and DOCX are supported.") | |
# Define regular expressions to extract information | |
name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)" | |
email_pattern = r"[\w\.-]+@[\w\.-]+" | |
phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})" | |
name = re.search(name_pattern, text) | |
email = re.search(email_pattern, text) | |
phone = re.search(phone_pattern, text) | |
if name: | |
name = name.group() | |
else: | |
name = "Name not found" | |
if email: | |
email = email.group() | |
else: | |
email = "Email not found" | |
if phone: | |
phone = phone.group() | |
else: | |
phone = "Phone number not found" | |
extracted_info = { | |
"Name": name, | |
"Email": email, | |
"Phone": phone, | |
} | |
return extracted_info | |
# Define a Gradio interface | |
iface = gr.Interface( | |
fn=extract_info_from_resume, | |
inputs=gr.inputs.File(type="file"), | |
outputs="json" | |
) | |
# Deploy the Gradio interface | |
iface.launch(share=True) |