meraspace / 1app.py
skolvankar's picture
Add application file
8508bc5
raw
history blame contribute delete
No virus
3.58 kB
# -*- coding: utf-8 -*-
"""Skill Transformation Journey.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
"""
import gradio as gr
import re
import openai
from openai import OpenAI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY")
api_key="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",
)
# Specify the sheet name in the Excel file
excel_file_path = "1.csv"
sheet_name = "Shortlisted Courses" # Replace with the actual sheet name
# Read the Excel file into a Pandas DataFrame
courses_df = pd.read_csv(excel_file_path)
# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))
def html_coversion(gpt_content):
# Provided data in text format
data_text = gpt_content
# Extract course details using a modified regular expression
courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)
# Process each tuple to remove the second occurrence of the course link
processed_courses = []
for course_tuple in courses:
# Find the index of the second occurrence of the course link
index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
# Remove the second occurrence of the course link from the tuple
processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
processed_courses.append(processed_tuple)
# Convert the processed list of tuples into a DataFrame
df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link'])
# Convert the DataFrame to an HTML table
html_table = df.to_html(index=False, escape=False)
# Print or save the HTML table
return html_table
# Function to recommend courses based on user input using GPT and TF-IDF
def recommend_courses(user_skill):
# Combine user's input into a single string for TF-IDF
user_input = f"{user_skill}"
# Use TF-IDF and cosine similarity for initial course recommendations
user_vector = tfidf_vectorizer.transform([user_input])
cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
# Get initial course recommendations based on similarity scores
recommendations = courses_df.copy()
recommendations['Similarity'] = cosine_similarities[0]
# Sort by similarity and get top recommendations
top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)
# Generate a text summary of the initial recommendations
initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)
# Assume GPT generates HTML-formatted final recommendations
final_recommendations_html = html_coversion(initial_recommendations_text)
return final_recommendations_html
# Gradio Interface with dynamically generated dropdown options
iface = gr.Interface(
fn=recommend_courses,
inputs=[
gr.Textbox("text", label="Enter expected skill"),
#gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
],
outputs="html",
live=True
)
# Launch the Gradio interface and save the output to an HTML file
iface.launch(share=True)