Spaces:

abhibisht89
/

CV_MATCHER

Runtime error

App Files Files Community

CV_MATCHER / app.py

abhibisht89

Update app.py

9c34257 over 2 years ago

raw

history blame contribute delete

No virus

3.25 kB

	import spacy
	import gradio as gr
	from spacy.pipeline import EntityRuler
	from spacy import displacy
	import jsonlines
	from spacy.cli import download
	download('en_core_web_sm')
	nlp = spacy.load('en_core_web_sm')

	# Create list with entity labels from jsonl file
	with jsonlines.open("skill_patterns.jsonl") as f:
	created_entities = [line['label'].upper() for line in f.iter()]

	def extract_text_from_word(txt):
	'''Opens en reads in a .doc or .docx file from path'''
	return txt.replace('\n', ' ').replace('\t', ' ').lower()

	def add_newruler_to_pipeline(skill_pattern_path):
	'''Reads in all created patterns from a JSONL file and adds it to the pipeline after PARSER and before NER'''

	# new_ruler = EntityRuler(nlp).from_disk(skill_pattern_path)
	ruler=nlp.add_pipe("entity_ruler",after='parser')
	ruler.from_disk(skill_pattern_path) # loads patterns only

	def create_skill_set(doc):
	'''Create a set of the extracted skill entities of a doc'''

	return set([ent.label_.upper()[6:] for ent in doc.ents if 'skill' in ent.label_.lower()])

	def create_skillset_dict(resume_names, resume_texts):
	'''Create a dictionary containing a set of the extracted skills. Name is key, matching skillset is value'''
	skillsets = [create_skill_set(resume_text) for resume_text in resume_texts]
	return dict(zip(resume_names, skillsets))

	def match_skills(vacature_set, cv_set, resume_name):
	'''Get intersection of resume skills and job offer skills and return match percentage'''

	if len(vacature_set) < 1:
	print('could not extract skills from job offer text')
	else:
	pct_match = round(len(vacature_set.intersection(cv_set[resume_name])) / len(vacature_set) * 100, 0)
	print(resume_name + " has a {}% skill match on this job offer".format(pct_match))
	print('Required skills: {} '.format(vacature_set))
	print('Matched skills: {} \n'.format(vacature_set.intersection(cv_set[resume_name])))

	return (resume_name, pct_match)

	add_newruler_to_pipeline("skill_patterns.jsonl")

	def match(CV,JD):
	resume_texts=[]
	resume_texts.append(nlp(CV))
	resume_names=['ABHI']
	skillset_dict = create_skillset_dict(resume_names, resume_texts)
	jd_skillset = create_skill_set(nlp(JD))
	match_pairs = [match_skills(jd_skillset, skillset_dict, name) for name in skillset_dict.keys()]
	if match_pairs[0]:
	return match_pairs[0][1]
	else:
	return "No matching skill set."

	exp=["Who is steve jobs?","What is coldplay?","What is a turing test?","What is the most interesting thing about our universe?","What are the most beautiful places on earth?"]

	desc="A Machine Learning Based Resume Matcher, to compare Resumes with Job Descriptions. "

	inp1=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="Resume Details")
	inp2=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="Job Description")

	out=gr.outputs.Textbox(type="auto",label="Match Score")

	iface = gr.Interface(fn=match, inputs=[inp1,inp2], outputs=out,title="A Machine Learning Based Resume Matcher, to compare Resumes with Job Descriptions",article=desc,theme="huggingface",layout='vertical')
	iface.launch(debug=True)