Spaces:

mkoot007
/

Classification-testing

Runtime error

App Files Files Community

Classification-testing / app.py

mkoot007

Update app.py

406399b 11 months ago

raw

history blame

No virus

1.93 kB

	import gradio as gr
	import re
	from docx import Document # Use python-docx to read DOCX files
	from PyPDF2 import PdfFileReader # Import PdfFileReader from PyPDF2

	# Function to extract text from a PDF file
	def extract_text_from_pdf(pdf_file):
	text = ""
	pdf = PdfFileReader(pdf_file)
	for page_num in range(pdf.getNumPages()):
	page = pdf.getPage(page_num)
	text += page.extractText()
	return text

	# Function to extract text from a DOCX file
	def extract_text_from_docx(docx_file):
	doc = Document(docx_file)
	text = "\n".join([para.text for para in doc.paragraphs])
	return text

	# Function to extract information from a resume
	def extract_info_from_resume(resume_path):
	if resume_path.name.endswith('.pdf'):
	text = extract_text_from_pdf(resume_path)
	elif resume_path.name.endswith('.docx'):
	text = extract_text_from_docx(resume_path)
	else:
	raise ValueError("Unsupported file format. Only PDF and DOCX are supported.")

	# Define regular expressions to extract information
	name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)+)"
	email_pattern = r"[\w\.-]+@[\w\.-]+"
	phone_pattern = r"(\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})"

	name = re.search(name_pattern, text)
	email = re.search(email_pattern, text)
	phone = re.search(phone_pattern, text)

	if name:
	name = name.group()
	else:
	name = "Name not found"
	if email:
	email = email.group()
	else:
	email = "Email not found"
	if phone:
	phone = phone.group()
	else:
	phone = "Phone number not found"

	extracted_info = {
	"Name": name,
	"Email": email,
	"Phone": phone,
	}

	return extracted_info

	# Define a Gradio interface
	iface = gr.Interface(
	fn=extract_info_from_resume,
	inputs=gr.inputs.File(type="file"),
	outputs="json"
	)

	# Deploy the Gradio interface
	iface.launch(share=True)