File size: 3,581 Bytes
8508bc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: utf-8 -*-
"""Skill Transformation Journey.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
"""

import gradio as gr
import re
import openai
from openai import OpenAI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key="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",
)

# Specify the sheet name in the Excel file
excel_file_path = "1.csv"
sheet_name = "Shortlisted Courses"  # Replace with the actual sheet name

# Read the Excel file into a Pandas DataFrame
courses_df = pd.read_csv(excel_file_path)

# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))

def html_coversion(gpt_content):

  # Provided data in text format
  data_text = gpt_content

  # Extract course details using a modified regular expression
  courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)

  # Process each tuple to remove the second occurrence of the course link
  processed_courses = []
  for course_tuple in courses:
      # Find the index of the second occurrence of the course link
      index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
      # Remove the second occurrence of the course link from the tuple
      processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
      processed_courses.append(processed_tuple)

  # Convert the processed list of tuples into a DataFrame
  df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link'])

  # Convert the DataFrame to an HTML table
  html_table = df.to_html(index=False, escape=False)

  # Print or save the HTML table
  return html_table

# Function to recommend courses based on user input using GPT and TF-IDF
def recommend_courses(user_skill):
    # Combine user's input into a single string for TF-IDF
    user_input = f"{user_skill}"

    # Use TF-IDF and cosine similarity for initial course recommendations
    user_vector = tfidf_vectorizer.transform([user_input])
    cosine_similarities = linear_kernel(user_vector, tfidf_matrix)

    # Get initial course recommendations based on similarity scores
    recommendations = courses_df.copy()
    recommendations['Similarity'] = cosine_similarities[0]

    # Sort by similarity and get top recommendations
    top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)

    # Generate a text summary of the initial recommendations
    initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)

    
    # Assume GPT generates HTML-formatted final recommendations
    final_recommendations_html = html_coversion(initial_recommendations_text)

    return final_recommendations_html

# Gradio Interface with dynamically generated dropdown options
iface = gr.Interface(
    fn=recommend_courses,
    inputs=[
        gr.Textbox("text", label="Enter expected skill"),
        #gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
    ],
    outputs="html",
    live=True
)

# Launch the Gradio interface and save the output to an HTML file
iface.launch(share=True)