File size: 5,222 Bytes
4e13291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f58a8b6
4e13291
f58a8b6
4e13291
f58a8b6
 
 
4e13291
 
 
 
f58a8b6
4e13291
f58a8b6
 
4e13291
 
f58a8b6
4e13291
 
 
 
 
f58a8b6
4e13291
 
 
f58a8b6
4e13291
 
 
 
f58a8b6
4e13291
 
 
 
 
 
f58a8b6
 
 
 
 
 
 
 
4e13291
 
f58a8b6
4e13291
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from langdetect import detect

#information about job descriptions
data = {
    'Company': ['Google', 'Amazon', 'Microsoft', 'Facebook', 'Tesla'],
    'Job_Description': [
        "We are looking for a Senior Software Engineer with extensive experience in Python, Java, and cloud computing. The candidate should have experience working in an Agile environment and a deep understanding of machine learning.",
        "The Data Analyst will analyze large datasets to uncover trends, patterns, and insights. Proficiency in SQL, Python, and data visualization tools like PowerBI or Tableau is required.",
        "Hiring a Cloud Architect with experience in Azure, AWS, and cloud infrastructure design. The ideal candidate should have experience with Docker, Kubernetes, and network security.",
        "AI Research Scientist with expertise in machine learning, deep learning, and natural language processing (NLP). Experience with TensorFlow, PyTorch, and data-driven research.",
        "Looking for an Electrical Engineer with experience in circuit design, power electronics, and embedded systems. Proficiency in CAD tools and simulation software is a must."
    ]
}

# Load the job descriptions into a pandas DataFrame
df = pd.DataFrame(data)
# Load the Hugging Face model for semantic similarity
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M")
lang_code_mapping = {
    'ar': 'arb_Arab',  # Arabic
    'fr': 'fra_Latn',  # French
    'es': 'spa_Latn',  # Spanish
    'de': 'deu_Latn',  # German
}

# Take CV and translate it to English if not in English to compare to job descriptions and return English CV and detected language
def translate_to_english(cv_text):
    detected_lang = lang_code_mapping.get(detect(cv_text), "eng_Latn")

    # Translate to English if not already in English
    if detected_lang != 'eng_Latn':
        translation = translator(cv_text, src_lang=detected_lang, tgt_lang="eng_Latn")[0]['translation_text']
        return translation, detected_lang
    else:
        return cv_text, detected_lang 

#if entered cv is not in english return the job description in the entered cv langauge
def translate_job_description_if_needed(job_description, target_lang):
    if target_lang != 'eng_Latn':
        return translator(job_description, src_lang="eng_Latn", tgt_lang=target_lang)[0]['translation_text']
    return job_description
    
# Function to find top 3 job descriptions matching the CV to job descriptions
def find_top_matches(cv_text):
    if not cv_text:
        return "Error: CV is empty", None

    # Translate CV to english if not in english
    cv_text, detected_lang = translate_to_english(cv_text)

    # Get job descriptions from the DataFrame as list
    descriptions = df['Job_Description'].tolist()

    # Encode both the CV and job descriptions to calcuate cosine similarities
    descriptions_embeddings = model.encode(descriptions, convert_to_tensor=True)
    cv_embedding = model.encode([cv_text], convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(cv_embedding, descriptions_embeddings)[0]

    # Get the top 3 matches based on similarity scores
    top_3_indices = similarities.argsort(descending=True)[:3]  # Get the indices of the top 3 matches 
    top_3_matches = df.iloc[top_3_indices]
    top_3_similarities = similarities[top_3_indices].numpy()

    #create vertical bar of top 3 match jobs to cv
    plt.bar(top_3_matches['Company'], top_3_similarities, color='skyblue')
    plt.ylabel('Similarity Score')
    plt.xlabel('Company')
    plt.title('Top 3 Job Description Matches')

    # Create a detailed summary for the top 3 job descriptions
    job_summaries = ""
    for _, row in top_3_matches.iterrows():
        # Translate job description if the detected language is not English
        job_desc_translated = translate_job_description_if_needed(row['Job_Description'], detected_lang)

        if detected_lang == 'arb_Arab':
            # Use dir="rtl" for right-to-left languages
            job_summaries += f'<div dir="rtl"><strong>الشركة</strong>: {row["Company"]} <br>'
            job_summaries += f'<strong>وصف الوظيفه :</strong> {job_desc_translated}<br><br></div>'
        else:
            # Normal left-to-right display
            job_summaries += f"<strong>Company:</strong> {row['Company']}<br>"
            job_summaries += f"<strong>Job Description :</strong> {job_desc_translated}<br><br>"

    return job_summaries, plt
 
# Define Gradio interface
demo = gr.Interface(
    fn=find_top_matches,
    inputs=gr.Textbox(lines=15, placeholder="Enter your CV text here...", label="CV Text"),
    outputs=[
        gr.HTML(label="Job Summaries"),
        gr.Plot(label="Top 3 Matching Job Descriptions")
    ],
    title="'Match CV to Job Description",
    description="Upload your CV to find the top 3 job descriptions that match from the available companies using semantic similarity."
)

# Launch the Gradio interface in Colab
demo.launch()