import gradio as gr import pandas as pd import matplotlib.pyplot as plt from transformers import pipeline from sentence_transformers import SentenceTransformer, util from langdetect import detect #information about job descriptions data = { 'Company': ['Google', 'Amazon', 'Microsoft', 'Facebook', 'Tesla'], 'Job_Description': [ "We are looking for a Senior Software Engineer with extensive experience in Python, Java, and cloud computing. The candidate should have experience working in an Agile environment and a deep understanding of machine learning.", "The Data Analyst will analyze large datasets to uncover trends, patterns, and insights. Proficiency in SQL, Python, and data visualization tools like PowerBI or Tableau is required.", "Hiring a Cloud Architect with experience in Azure, AWS, and cloud infrastructure design. The ideal candidate should have experience with Docker, Kubernetes, and network security.", "AI Research Scientist with expertise in machine learning, deep learning, and natural language processing (NLP). Experience with TensorFlow, PyTorch, and data-driven research.", "Looking for an Electrical Engineer with experience in circuit design, power electronics, and embedded systems. Proficiency in CAD tools and simulation software is a must." ] } # Load the job descriptions into a pandas DataFrame df = pd.DataFrame(data) # Load the Hugging Face model for semantic similarity model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') translator = pipeline(task="translation", model="facebook/nllb-200-distilled-600M") lang_code_mapping = { 'ar': 'arb_Arab', # Arabic 'fr': 'fra_Latn', # French 'es': 'spa_Latn', # Spanish 'de': 'deu_Latn', # German } # Take CV and translate it to English if not in English to compare to job descriptions and return English CV and detected language def translate_to_english(cv_text): detected_lang = lang_code_mapping.get(detect(cv_text), "eng_Latn") # Translate to English if not already in English if detected_lang != 'eng_Latn': translation = translator(cv_text, src_lang=detected_lang, tgt_lang="eng_Latn")[0]['translation_text'] return translation, detected_lang else: return cv_text, detected_lang #if entered cv is not in english return the job description in the entered cv langauge def translate_job_description_if_needed(job_description, target_lang): if target_lang != 'eng_Latn': return translator(job_description, src_lang="eng_Latn", tgt_lang=target_lang)[0]['translation_text'] return job_description # Function to find top 3 job descriptions matching the CV to job descriptions def find_top_matches(cv_text): if not cv_text: return "Error: CV is empty", None # Translate CV to english if not in english cv_text, detected_lang = translate_to_english(cv_text) # Get job descriptions from the DataFrame as list descriptions = df['Job_Description'].tolist() # Encode both the CV and job descriptions to calcuate cosine similarities descriptions_embeddings = model.encode(descriptions, convert_to_tensor=True) cv_embedding = model.encode([cv_text], convert_to_tensor=True) similarities = util.pytorch_cos_sim(cv_embedding, descriptions_embeddings)[0] # Get the top 3 matches based on similarity scores top_3_indices = similarities.argsort(descending=True)[:3] # Get the indices of the top 3 matches top_3_matches = df.iloc[top_3_indices] top_3_similarities = similarities[top_3_indices].numpy() #create vertical bar of top 3 match jobs to cv plt.bar(top_3_matches['Company'], top_3_similarities, color='skyblue') plt.ylabel('Similarity Score') plt.xlabel('Company') plt.title('Top 3 Job Description Matches') # Create a detailed summary for the top 3 job descriptions job_summaries = "" for _, row in top_3_matches.iterrows(): # Translate job description if the detected language is not English job_desc_translated = translate_job_description_if_needed(row['Job_Description'], detected_lang) if detected_lang == 'arb_Arab': # Use dir="rtl" for right-to-left languages job_summaries += f'
الشركة: {row["Company"]}
' job_summaries += f'وصف الوظيفه : {job_desc_translated}

' else: # Normal left-to-right display job_summaries += f"Company: {row['Company']}
" job_summaries += f"Job Description : {job_desc_translated}

" return job_summaries, plt # Define Gradio interface demo = gr.Interface( fn=find_top_matches, inputs=gr.Textbox(lines=15, placeholder="Enter your CV text here...", label="CV Text"), outputs=[ gr.HTML(label="Job Summaries"), gr.Plot(label="Top 3 Matching Job Descriptions") ], title="'Match CV to Job Description", description="Upload your CV to find the top 3 job descriptions that match from the available companies using semantic similarity." ) # Launch the Gradio interface in Colab demo.launch()