# -*- coding: utf-8 -*- """Skill Transformation Journey.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw """ import gradio as gr import re import openai from openai import OpenAI import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import linear_kernel client = OpenAI( # defaults to os.environ.get("OPENAI_API_KEY") api_key="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", ) # Specify the sheet name in the Excel file excel_file_path = "1.csv" sheet_name = "Shortlisted Courses" # Replace with the actual sheet name # Read the Excel file into a Pandas DataFrame courses_df = pd.read_csv(excel_file_path) # Function to recommend courses based on user input using GPT and TF-IDF def recommend_courses(user_skill, ed_qual): # Combine user's input into a single string for TF-IDF user_input = f"{user_skill} {ed_qual}" # Use TF-IDF and cosine similarity for initial course recommendations user_vector = tfidf_vectorizer.transform([user_input]) cosine_similarities = linear_kernel(user_vector, tfidf_matrix) # Get initial course recommendations based on similarity scores recommendations = courses_df.copy() recommendations['Similarity'] = cosine_similarities[0] # Sort by similarity and get top recommendations top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5) # Generate a text summary of the initial recommendations initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False) # Combine user input and initial recommendations for GPT prompt gpt_prompt = f"Refine the following course recommendations based on user input:\n\n"\ f"{user_input}\n\n"\ f"Initial Recommendations:\n\n"\ f"{initial_recommendations_text}" # Use GPT to generate refined recommendations gpt_response = client.chat.completions.create( messages=[ { "role": "system", "content": gpt_prompt, } ], model="gpt-3.5-turbo", max_tokens=1000 ) # Assume GPT generates HTML-formatted final recommendations gpt_content = gpt_response.choices[0].message.content #gpt_response['choices'][0]['text'] # Assume GPT generates HTML-formatted final recommendations final_recommendations_html = html_coversion(gpt_content) return final_recommendations_html # Create a TF-IDF vectorizer tfidf_vectorizer = TfidfVectorizer(stop_words='english') tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna('')) user_skill = "psychology" ed_qual = "B.Tech/B.Sc" html = recommend_courses(user_skill, ed_qual) html def html_coversion(gpt_content): # Provided data in text format data_text = gpt_content # Extract course details using a modified regular expression courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text) # Process each tuple to remove the second occurrence of the course link processed_courses = [] for course_tuple in courses: # Find the index of the second occurrence of the course link index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1) # Remove the second occurrence of the course link from the tuple processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:] processed_courses.append(processed_tuple) # Convert the processed list of tuples into a DataFrame df = pd.DataFrame(processed_courses, columns=['Sr No', 'Course Name', 'Course Link', 'Description']) # Convert the DataFrame to an HTML table html_table = df.to_html(index=False, escape=False) # Print or save the HTML table return html_table # Gradio Interface with dynamically generated dropdown options iface = gr.Interface( fn=recommend_courses, inputs=[ gr.Textbox("text", label="Enter expected skill"), gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"), ], outputs="html", live=True ) # Launch the Gradio interface and save the output to an HTML file iface.launch(share=True)