skolvankar commited on
Commit
5a359a3
1 Parent(s): d691ba0

Add application file

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Skill Transformation Journey.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
8
+ """
9
+
10
+ # Commented out IPython magic to ensure Python compatibility.
11
+ # %pip install openai
12
+
13
+ # Commented out IPython magic to ensure Python compatibility.
14
+ # %pip install gradio
15
+
16
+ import gradio as gr
17
+ import re
18
+ import openai
19
+ from openai import OpenAI
20
+ import pandas as pd
21
+ from sklearn.feature_extraction.text import TfidfVectorizer
22
+ from sklearn.metrics.pairwise import linear_kernel
23
+
24
+ client = OpenAI(
25
+ # defaults to os.environ.get("OPENAI_API_KEY")
26
+ api_key="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",
27
+ )
28
+
29
+ # Specify the sheet name in the Excel file
30
+ excel_file_path = "/content/drive/MyDrive/Skill Transformation Journey/AI Integrations in SWAYAM_V1.0.xlsx"
31
+ sheet_name = "Shortlisted Courses" # Replace with the actual sheet name
32
+
33
+ # Read the Excel file into a Pandas DataFrame
34
+ courses_df = pd.read_excel(excel_file_path, sheet_name=sheet_name)
35
+
36
+ # Function to recommend courses based on user input using GPT and TF-IDF
37
+ def recommend_courses(user_skill, ed_qual):#, #primary_skill_1):
38
+ # Combine user's input into a single string for TF-IDF
39
+ user_input = f"{user_skill} {ed_qual}"
40
+
41
+ # Use TF-IDF and cosine similarity for initial course recommendations
42
+ user_vector = tfidf_vectorizer.transform([user_input])
43
+ cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
44
+
45
+ # Get initial course recommendations based on similarity scores
46
+ recommendations = courses_df.copy()
47
+ recommendations['Similarity'] = cosine_similarities[0]
48
+
49
+ # Sort by similarity and get top recommendations
50
+ top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)
51
+
52
+ # Generate a text summary of the initial recommendations
53
+ initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)
54
+
55
+ # Combine user input and initial recommendations for GPT prompt
56
+ gpt_prompt = f"Refine the following course recommendations based on user input:\n\n"\
57
+ f"{user_input}\n\n"\
58
+ f"Initial Recommendations:\n\n"\
59
+ f"{initial_recommendations_text}"
60
+
61
+ # Use GPT to generate refined recommendations
62
+ gpt_response = client.chat.completions.create(
63
+ messages=[
64
+ {
65
+ "role": "system",
66
+ "content": gpt_prompt,
67
+ }
68
+ ],
69
+ model="gpt-3.5-turbo",
70
+ max_tokens=1000
71
+ )
72
+
73
+ # Assume GPT generates HTML-formatted final recommendations
74
+ gpt_content = gpt_response.choices[0].message.content #gpt_response['choices'][0]['text']
75
+
76
+ # Assume GPT generates HTML-formatted final recommendations
77
+ final_recommendations_html = html_coversion(gpt_content)
78
+
79
+ return final_recommendations_html
80
+
81
+ # Create a TF-IDF vectorizer
82
+ tfidf_vectorizer = TfidfVectorizer(stop_words='english')
83
+ tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))
84
+
85
+ user_skill = "psychology"
86
+
87
+ ed_qual = "B.Tech/B.Sc"
88
+
89
+ html = recommend_courses(user_skill, ed_qual)
90
+
91
+ html
92
+
93
+ def html_coversion(gpt_content):
94
+
95
+ # Provided data in text format
96
+ data_text = gpt_content
97
+
98
+ # Extract course details using a modified regular expression
99
+ courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)
100
+
101
+ # Process each tuple to remove the second occurrence of the course link
102
+ processed_courses = []
103
+ for course_tuple in courses:
104
+ # Find the index of the second occurrence of the course link
105
+ index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
106
+ # Remove the second occurrence of the course link from the tuple
107
+ processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
108
+ processed_courses.append(processed_tuple)
109
+
110
+ # Convert the processed list of tuples into a DataFrame
111
+ df = pd.DataFrame(processed_courses, columns=['Sr No', 'Course Name', 'Course Link', 'Description'])
112
+
113
+ # Convert the DataFrame to an HTML table
114
+ html_table = df.to_html(index=False, escape=False)
115
+
116
+ # Print or save the HTML table
117
+ return html_table
118
+
119
+ # Gradio Interface with dynamically generated dropdown options
120
+ iface = gr.Interface(
121
+ fn=recommend_courses,
122
+ inputs=[
123
+ gr.Textbox("text", label="Enter expected skill"),
124
+ gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
125
+ ],
126
+ outputs="html",
127
+ live=True
128
+ )
129
+
130
+ # Launch the Gradio interface and save the output to an HTML file
131
+ iface.launch(share=True)
132
+