Lubna25 commited on
Commit
4e13291
1 Parent(s): 3600e4c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ from transformers import pipeline
5
+ from sentence_transformers import SentenceTransformer, util
6
+ from langdetect import detect
7
+
8
+ #information about job descriptions
9
+ data = {
10
+ 'Company': ['Google', 'Amazon', 'Microsoft', 'Facebook', 'Tesla'],
11
+ 'Job_Description': [
12
+ "We are looking for a Senior Software Engineer with extensive experience in Python, Java, and cloud computing. The candidate should have experience working in an Agile environment and a deep understanding of machine learning.",
13
+ "The Data Analyst will analyze large datasets to uncover trends, patterns, and insights. Proficiency in SQL, Python, and data visualization tools like PowerBI or Tableau is required.",
14
+ "Hiring a Cloud Architect with experience in Azure, AWS, and cloud infrastructure design. The ideal candidate should have experience with Docker, Kubernetes, and network security.",
15
+ "AI Research Scientist with expertise in machine learning, deep learning, and natural language processing (NLP). Experience with TensorFlow, PyTorch, and data-driven research.",
16
+ "Looking for an Electrical Engineer with experience in circuit design, power electronics, and embedded systems. Proficiency in CAD tools and simulation software is a must."
17
+ ]
18
+ }
19
+
20
+ # Load the job descriptions into a pandas DataFrame
21
+ df = pd.DataFrame(data)
22
+ # Load the Hugging Face model for semantic similarity
23
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
24
+ translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M")
25
+ lang_code_mapping = {
26
+ 'ar': 'arb_Arab', # Arabic
27
+ 'fr': 'fra_Latn', # French
28
+ 'es': 'spa_Latn', # Spanish
29
+ 'de': 'deu_Latn', # German
30
+ }
31
+
32
+ # Take CV and translate it to English if not in English to compare to job descriptions and return English CV and detected language
33
+ def translate_to_english(cv_text):
34
+ detected_lang = lang_code_mapping.get(detect(cv_text), "eng_Latn")
35
+
36
+ # Translate to English if not already in English
37
+ if detected_lang != 'eng_Latn':
38
+ translation = translator(cv_text, src_lang=detected_lang, tgt_lang="eng_Latn")[0]['translation_text']
39
+ return translation, detected_lang
40
+ else:
41
+ return cv_text, detected_lang
42
+
43
+ #if an entered cv is not in English return the job description in the entered cv language
44
+ def translate_job_description_if_needed(job_description, target_lang):
45
+ if target_lang != 'en':
46
+ return translator(job_description, src_lang="eng_Latn", tgt_lang=target_lang)[0]['translation_text']
47
+ return job_description
48
+
49
+ # Function to find top 3 job descriptions matching the CV using semantic similarity
50
+ def find_top_matches(cv_text):
51
+ if not cv_text:
52
+ return "Error: CV is empty", None
53
+
54
+ # Translate CV to English if it contains Arabic text
55
+ cv_text, detected_lang = translate_to_english(cv_text)
56
+ # Get job descriptions from the DataFrame
57
+ descriptions = df['Job_Description'].tolist()
58
+
59
+ # Encode both the CV and job descriptions
60
+ descriptions_embeddings = model.encode(descriptions, convert_to_tensor=True)
61
+ cv_embedding = model.encode([cv_text], convert_to_tensor=True)
62
+ # Calculate cosine similarities between the CV and all job descriptions
63
+ similarities = util.pytorch_cos_sim(cv_embedding, descriptions_embeddings)[0]
64
+
65
+ # Get the top 3 matches based on similarity scores
66
+ top_3_indices = similarities.argsort(descending=True)[:3] # Get the indices of the top 3 matches
67
+ top_3_matches = df.iloc[top_3_indices]
68
+ top_3_similarities = similarities[top_3_indices].numpy()
69
+
70
+ #create vertical bar
71
+ plt.bar(top_3_matches['Company'], top_3_similarities, color='skyblue')
72
+
73
+ # Set the labels and title
74
+ plt.ylabel('Similarity Score')
75
+ plt.xlabel('Company')
76
+ plt.title('Top 3 Job Description Matches')
77
+ # Create a detailed summary for the top 3 job descriptions
78
+ job_summaries = ""
79
+ for _, row in top_3_matches.iterrows():
80
+ # Translate job description if the detected language is not English
81
+ job_desc_translated = translate_job_description_if_needed(row['Job_Description'], detected_lang)
82
+
83
+ # Show job description only in the detected language if it's not English
84
+ job_summaries += f"<strong>Company:</strong> {row['Company']}<br>"
85
+ job_summaries += f"<strong>Job Description :</strong> {job_desc_translated}<br><br>"
86
+
87
+ return job_summaries, plt
88
+
89
+ # Define Gradio interface
90
+ demo = gr.Interface(
91
+ fn=find_top_matches,
92
+ inputs=gr.Textbox(lines=15, placeholder="Enter your CV text here...", label="CV Text"),
93
+ outputs=[
94
+ gr.HTML(label="Job Summaries"),
95
+ gr.Plot(label="Top 3 Matching Job Descriptions")
96
+ ],
97
+ title="'Match CV to Job Description",
98
+ description="Upload your CV to find the top 3 job descriptions that match from the available companies using semantic similarity."
99
+ )
100
+
101
+ # Launch the Gradio interface in Colab
102
+ demo.launch()