CV_MATCHER / app.py
abhibisht89's picture
Update app.py
9c34257
raw
history blame contribute delete
No virus
3.25 kB
import spacy
import gradio as gr
from spacy.pipeline import EntityRuler
from spacy import displacy
import jsonlines
from spacy.cli import download
download('en_core_web_sm')
nlp = spacy.load('en_core_web_sm')
# Create list with entity labels from jsonl file
with jsonlines.open("skill_patterns.jsonl") as f:
created_entities = [line['label'].upper() for line in f.iter()]
def extract_text_from_word(txt):
'''Opens en reads in a .doc or .docx file from path'''
return txt.replace('\n', ' ').replace('\t', ' ').lower()
def add_newruler_to_pipeline(skill_pattern_path):
'''Reads in all created patterns from a JSONL file and adds it to the pipeline after PARSER and before NER'''
# new_ruler = EntityRuler(nlp).from_disk(skill_pattern_path)
ruler=nlp.add_pipe("entity_ruler",after='parser')
ruler.from_disk(skill_pattern_path) # loads patterns only
def create_skill_set(doc):
'''Create a set of the extracted skill entities of a doc'''
return set([ent.label_.upper()[6:] for ent in doc.ents if 'skill' in ent.label_.lower()])
def create_skillset_dict(resume_names, resume_texts):
'''Create a dictionary containing a set of the extracted skills. Name is key, matching skillset is value'''
skillsets = [create_skill_set(resume_text) for resume_text in resume_texts]
return dict(zip(resume_names, skillsets))
def match_skills(vacature_set, cv_set, resume_name):
'''Get intersection of resume skills and job offer skills and return match percentage'''
if len(vacature_set) < 1:
print('could not extract skills from job offer text')
else:
pct_match = round(len(vacature_set.intersection(cv_set[resume_name])) / len(vacature_set) * 100, 0)
print(resume_name + " has a {}% skill match on this job offer".format(pct_match))
print('Required skills: {} '.format(vacature_set))
print('Matched skills: {} \n'.format(vacature_set.intersection(cv_set[resume_name])))
return (resume_name, pct_match)
add_newruler_to_pipeline("skill_patterns.jsonl")
def match(CV,JD):
resume_texts=[]
resume_texts.append(nlp(CV))
resume_names=['ABHI']
skillset_dict = create_skillset_dict(resume_names, resume_texts)
jd_skillset = create_skill_set(nlp(JD))
match_pairs = [match_skills(jd_skillset, skillset_dict, name) for name in skillset_dict.keys()]
if match_pairs[0]:
return match_pairs[0][1]
else:
return "No matching skill set."
exp=["Who is steve jobs?","What is coldplay?","What is a turing test?","What is the most interesting thing about our universe?","What are the most beautiful places on earth?"]
desc="A Machine Learning Based Resume Matcher, to compare Resumes with Job Descriptions. "
inp1=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="Resume Details")
inp2=gr.inputs.Textbox(lines=10, placeholder=None, default="", label="Job Description")
out=gr.outputs.Textbox(type="auto",label="Match Score")
iface = gr.Interface(fn=match, inputs=[inp1,inp2], outputs=out,title="A Machine Learning Based Resume Matcher, to compare Resumes with Job Descriptions",article=desc,theme="huggingface",layout='vertical')
iface.launch(debug=True)