Spaces:
Runtime error
Runtime error
skolvankar
commited on
Commit
•
8508bc5
1
Parent(s):
f17f9a8
Add application file
Browse files
1app.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Skill Transformation Journey.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colaboratory.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
|
8 |
+
"""
|
9 |
+
|
10 |
+
import gradio as gr
|
11 |
+
import re
|
12 |
+
import openai
|
13 |
+
from openai import OpenAI
|
14 |
+
import pandas as pd
|
15 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
16 |
+
from sklearn.metrics.pairwise import linear_kernel
|
17 |
+
|
18 |
+
client = OpenAI(
|
19 |
+
# defaults to os.environ.get("OPENAI_API_KEY")
|
20 |
+
api_key="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",
|
21 |
+
)
|
22 |
+
|
23 |
+
# Specify the sheet name in the Excel file
|
24 |
+
excel_file_path = "1.csv"
|
25 |
+
sheet_name = "Shortlisted Courses" # Replace with the actual sheet name
|
26 |
+
|
27 |
+
# Read the Excel file into a Pandas DataFrame
|
28 |
+
courses_df = pd.read_csv(excel_file_path)
|
29 |
+
|
30 |
+
# Create a TF-IDF vectorizer
|
31 |
+
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
32 |
+
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))
|
33 |
+
|
34 |
+
def html_coversion(gpt_content):
|
35 |
+
|
36 |
+
# Provided data in text format
|
37 |
+
data_text = gpt_content
|
38 |
+
|
39 |
+
# Extract course details using a modified regular expression
|
40 |
+
courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)
|
41 |
+
|
42 |
+
# Process each tuple to remove the second occurrence of the course link
|
43 |
+
processed_courses = []
|
44 |
+
for course_tuple in courses:
|
45 |
+
# Find the index of the second occurrence of the course link
|
46 |
+
index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
|
47 |
+
# Remove the second occurrence of the course link from the tuple
|
48 |
+
processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
|
49 |
+
processed_courses.append(processed_tuple)
|
50 |
+
|
51 |
+
# Convert the processed list of tuples into a DataFrame
|
52 |
+
df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link'])
|
53 |
+
|
54 |
+
# Convert the DataFrame to an HTML table
|
55 |
+
html_table = df.to_html(index=False, escape=False)
|
56 |
+
|
57 |
+
# Print or save the HTML table
|
58 |
+
return html_table
|
59 |
+
|
60 |
+
# Function to recommend courses based on user input using GPT and TF-IDF
|
61 |
+
def recommend_courses(user_skill):
|
62 |
+
# Combine user's input into a single string for TF-IDF
|
63 |
+
user_input = f"{user_skill}"
|
64 |
+
|
65 |
+
# Use TF-IDF and cosine similarity for initial course recommendations
|
66 |
+
user_vector = tfidf_vectorizer.transform([user_input])
|
67 |
+
cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
|
68 |
+
|
69 |
+
# Get initial course recommendations based on similarity scores
|
70 |
+
recommendations = courses_df.copy()
|
71 |
+
recommendations['Similarity'] = cosine_similarities[0]
|
72 |
+
|
73 |
+
# Sort by similarity and get top recommendations
|
74 |
+
top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)
|
75 |
+
|
76 |
+
# Generate a text summary of the initial recommendations
|
77 |
+
initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)
|
78 |
+
|
79 |
+
|
80 |
+
# Assume GPT generates HTML-formatted final recommendations
|
81 |
+
final_recommendations_html = html_coversion(initial_recommendations_text)
|
82 |
+
|
83 |
+
return final_recommendations_html
|
84 |
+
|
85 |
+
# Gradio Interface with dynamically generated dropdown options
|
86 |
+
iface = gr.Interface(
|
87 |
+
fn=recommend_courses,
|
88 |
+
inputs=[
|
89 |
+
gr.Textbox("text", label="Enter expected skill"),
|
90 |
+
#gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
|
91 |
+
],
|
92 |
+
outputs="html",
|
93 |
+
live=True
|
94 |
+
)
|
95 |
+
|
96 |
+
# Launch the Gradio interface and save the output to an HTML file
|
97 |
+
iface.launch(share=True)
|
98 |
+
|
app.py
CHANGED
@@ -1,98 +1,106 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
|
4 |
Automatically generated by Colaboratory.
|
5 |
|
6 |
Original file is located at
|
7 |
-
https://colab.research.google.com/drive/
|
8 |
"""
|
9 |
|
10 |
-
|
11 |
-
|
|
|
12 |
import openai
|
13 |
-
from openai import OpenAI
|
14 |
import pandas as pd
|
15 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
16 |
from sklearn.metrics.pairwise import linear_kernel
|
17 |
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
#
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
#
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
|
68 |
|
69 |
-
# Get
|
70 |
recommendations = courses_df.copy()
|
71 |
recommendations['Similarity'] = cosine_similarities[0]
|
72 |
|
73 |
-
# Sort by similarity and
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
)
|
95 |
-
|
96 |
-
#
|
97 |
-
|
98 |
-
|
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""[DRAFT]SWAYAM_CHATBOT SYSTEM_Course Recommendation System.ipynb
|
3 |
|
4 |
Automatically generated by Colaboratory.
|
5 |
|
6 |
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1s4_kZDgJcvRr7kfnw12oFNus45E2oOr9
|
8 |
"""
|
9 |
|
10 |
+
# Commented out IPython magic to ensure Python compatibility.
|
11 |
+
# %pip install openai
|
12 |
+
|
13 |
import openai
|
|
|
14 |
import pandas as pd
|
15 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
16 |
from sklearn.metrics.pairwise import linear_kernel
|
17 |
|
18 |
+
# Set your OpenAI API key
|
19 |
+
openai.api_key = "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" # Replace with your actual API key
|
20 |
+
|
21 |
+
# Sample course data
|
22 |
+
data = {
|
23 |
+
'CourseID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
|
24 |
+
'Title': ['Python Programming',
|
25 |
+
'Data Science with Python',
|
26 |
+
'Machine Learning',
|
27 |
+
'Web Development',
|
28 |
+
'Environmental Studies',
|
29 |
+
'Business Communication (Language-English/ Hindi/ MIL)',
|
30 |
+
'Management Principles and Applications',
|
31 |
+
'Analytical Geometry',
|
32 |
+
'Cost Accounting',
|
33 |
+
'Principles of Micro Economics',
|
34 |
+
'Human Resource Management',
|
35 |
+
'Fundamentals of Financial Management',
|
36 |
+
'Classical Political Philosophy',
|
37 |
+
'Differential Calculus',
|
38 |
+
'Sociology of Health and Medicine',
|
39 |
+
'Economic History of India (1857-1947)'],
|
40 |
+
'Description': [
|
41 |
+
'Start your journey in programming by learning Python from scratch.',
|
42 |
+
'Start your journey in Data Science and become data scientist by learning python and other data science libraries.',
|
43 |
+
'Master your programming skills and dive into the world of machine learning with python and other machine learning libraries',
|
44 |
+
'Start your journey in web development using python programming and Django library.',
|
45 |
+
'Explore the intricate relationship between humanity and the environment, and learn how to make informed decisions to preserve and protect our planet.',
|
46 |
+
'Enhance your communication skills in English, Hindi, or your mother tongue (MIL) to excel in the business world. Learn the art of effective written and verbal communication.',
|
47 |
+
'Gain a comprehensive understanding of the fundamental principles of management and their real-world applications to thrive in today\'s dynamic business environment.',
|
48 |
+
'Delve into the world of analytical geometry and master the mathematical techniques and concepts that underlie this fascinating branch of mathematics.',
|
49 |
+
'Learn the essentials of cost accounting and financial analysis to make sound business decisions and optimize financial performance.',
|
50 |
+
'Explore the principles of microeconomics, the study of individual economic behavior, and understand how economic decisions impact businesses and society.',
|
51 |
+
'Gain insight into the management of human resources, from recruitment to employee development, and learn how effective HR practices drive organizational success.',
|
52 |
+
'Understand the core principles of financial management, including budgeting, investment, and risk analysis, to make strategic financial decisions.',
|
53 |
+
'Dive into the world of classical political philosophy and explore the influential works of thinkers like Plato, Aristotle, and more, to understand the foundations of political thought.',
|
54 |
+
'Master the fundamental concepts of differential calculus, a branch of mathematics that deals with rates of change, and its applications in various fields.',
|
55 |
+
'Explore the sociological aspects of health, illness, and healthcare systems. Understand how society shapes healthcare practices and policies.',
|
56 |
+
'Take a journey through the economic history of India during a critical period of change and transformation, from 1857 to 1947, and understand the economic forces that shaped the nation.'
|
57 |
+
]
|
58 |
+
}
|
59 |
+
|
60 |
+
# Create a DataFrame from the course data
|
61 |
+
courses_df = pd.DataFrame(data)
|
62 |
+
|
63 |
+
# Function to recommend courses based on user skills
|
64 |
+
def recommend_courses(user_skills):
|
65 |
+
# Combine the user's skills into a single string
|
66 |
+
user_skills = ', '.join(user_skills.split())
|
67 |
+
|
68 |
+
# Create a TF-IDF vectorizer to convert course descriptions into vectors
|
69 |
+
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
70 |
+
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Description'])
|
71 |
+
|
72 |
+
# Calculate cosine similarity between user skills and course descriptions
|
73 |
+
user_vector = tfidf_vectorizer.transform([user_skills])
|
74 |
cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
|
75 |
|
76 |
+
# Get course recommendations based on similarity scores
|
77 |
recommendations = courses_df.copy()
|
78 |
recommendations['Similarity'] = cosine_similarities[0]
|
79 |
|
80 |
+
# Sort courses by similarity and recommend the top matches
|
81 |
+
recommendations = recommendations.sort_values(by='Similarity', ascending=False)
|
82 |
+
recommended_courses = recommendations[['CourseID', 'Title', 'Similarity']]
|
83 |
+
|
84 |
+
return recommended_courses
|
85 |
+
|
86 |
+
# Function to interact with GPT-3 and provide recommendations
|
87 |
+
def gpt_recommend_courses(user_input):
|
88 |
+
response = openai.Completion.create(
|
89 |
+
engine="text-davinci-002",
|
90 |
+
prompt=f"I have skills in {user_input}. What courses do you recommend?",
|
91 |
+
max_tokens=100,
|
92 |
+
n=1,
|
93 |
+
stop=None,
|
94 |
+
temperature=0.7,
|
95 |
+
)
|
96 |
+
recommendation_prompt = response.choices[0].text.strip()
|
97 |
+
|
98 |
+
return recommend_courses(recommendation_prompt)
|
99 |
+
|
100 |
+
# User input for skills
|
101 |
+
user_input = input("Enter your skills: ")
|
102 |
+
|
103 |
+
# Get course recommendations using GPT-3
|
104 |
+
recommended_courses = gpt_recommend_courses(user_input)
|
105 |
+
print("\nRecommended Courses:")
|
106 |
+
print(recommended_courses)
|