import gradio as gr import re from docx import Document # Use python-docx to read DOCX files from PyPDF2 import PdfFileReader # Import PdfFileReader from PyPDF2 # Function to extract text from a PDF file def extract_text_from_pdf(pdf_file): text = "" pdf = PdfFileReader(pdf_file) for page_num in range(pdf.getNumPages()): page = pdf.getPage(page_num) text += page.extractText() return text # Function to extract text from a DOCX file def extract_text_from_docx(docx_file): doc = Document(docx_file) text = "\n".join([para.text for para in doc.paragraphs]) return text # Function to extract information from a resume def extract_info_from_resume(resume_path): if resume_path.name.endswith('.pdf'): text = extract_text_from_pdf(resume_path) elif resume_path.name.endswith('.docx'): text = extract_text_from_docx(resume_path) else: raise ValueError("Unsupported file format. Only PDF and DOCX are supported.") # Define regular expressions to extract information name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)" email_pattern = r"[\w\.-]+@[\w\.-]+" phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})" name = re.search(name_pattern, text) email = re.search(email_pattern, text) phone = re.search(phone_pattern, text) if name: name = name.group() else: name = "Name not found" if email: email = email.group() else: email = "Email not found" if phone: phone = phone.group() else: phone = "Phone number not found" extracted_info = { "Name": name, "Email": email, "Phone": phone, } return extracted_info # Define a Gradio interface iface = gr.Interface( fn=extract_info_from_resume, inputs=gr.inputs.File(type="file"), outputs="json" ) # Deploy the Gradio interface iface.launch(share=True)