skolvankar commited on
Commit
8508bc5
1 Parent(s): f17f9a8

Add application file

Browse files
Files changed (2) hide show
  1. 1app.py +98 -0
  2. app.py +89 -81
1app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Skill Transformation Journey.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
8
+ """
9
+
10
+ import gradio as gr
11
+ import re
12
+ import openai
13
+ from openai import OpenAI
14
+ import pandas as pd
15
+ from sklearn.feature_extraction.text import TfidfVectorizer
16
+ from sklearn.metrics.pairwise import linear_kernel
17
+
18
+ client = OpenAI(
19
+ # defaults to os.environ.get("OPENAI_API_KEY")
20
+ api_key="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",
21
+ )
22
+
23
+ # Specify the sheet name in the Excel file
24
+ excel_file_path = "1.csv"
25
+ sheet_name = "Shortlisted Courses" # Replace with the actual sheet name
26
+
27
+ # Read the Excel file into a Pandas DataFrame
28
+ courses_df = pd.read_csv(excel_file_path)
29
+
30
+ # Create a TF-IDF vectorizer
31
+ tfidf_vectorizer = TfidfVectorizer(stop_words='english')
32
+ tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))
33
+
34
+ def html_coversion(gpt_content):
35
+
36
+ # Provided data in text format
37
+ data_text = gpt_content
38
+
39
+ # Extract course details using a modified regular expression
40
+ courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)
41
+
42
+ # Process each tuple to remove the second occurrence of the course link
43
+ processed_courses = []
44
+ for course_tuple in courses:
45
+ # Find the index of the second occurrence of the course link
46
+ index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
47
+ # Remove the second occurrence of the course link from the tuple
48
+ processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
49
+ processed_courses.append(processed_tuple)
50
+
51
+ # Convert the processed list of tuples into a DataFrame
52
+ df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link'])
53
+
54
+ # Convert the DataFrame to an HTML table
55
+ html_table = df.to_html(index=False, escape=False)
56
+
57
+ # Print or save the HTML table
58
+ return html_table
59
+
60
+ # Function to recommend courses based on user input using GPT and TF-IDF
61
+ def recommend_courses(user_skill):
62
+ # Combine user's input into a single string for TF-IDF
63
+ user_input = f"{user_skill}"
64
+
65
+ # Use TF-IDF and cosine similarity for initial course recommendations
66
+ user_vector = tfidf_vectorizer.transform([user_input])
67
+ cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
68
+
69
+ # Get initial course recommendations based on similarity scores
70
+ recommendations = courses_df.copy()
71
+ recommendations['Similarity'] = cosine_similarities[0]
72
+
73
+ # Sort by similarity and get top recommendations
74
+ top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)
75
+
76
+ # Generate a text summary of the initial recommendations
77
+ initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)
78
+
79
+
80
+ # Assume GPT generates HTML-formatted final recommendations
81
+ final_recommendations_html = html_coversion(initial_recommendations_text)
82
+
83
+ return final_recommendations_html
84
+
85
+ # Gradio Interface with dynamically generated dropdown options
86
+ iface = gr.Interface(
87
+ fn=recommend_courses,
88
+ inputs=[
89
+ gr.Textbox("text", label="Enter expected skill"),
90
+ #gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
91
+ ],
92
+ outputs="html",
93
+ live=True
94
+ )
95
+
96
+ # Launch the Gradio interface and save the output to an HTML file
97
+ iface.launch(share=True)
98
+
app.py CHANGED
@@ -1,98 +1,106 @@
1
  # -*- coding: utf-8 -*-
2
- """Skill Transformation Journey.ipynb
3
 
4
  Automatically generated by Colaboratory.
5
 
6
  Original file is located at
7
- https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
8
  """
9
 
10
- import gradio as gr
11
- import re
 
12
  import openai
13
- from openai import OpenAI
14
  import pandas as pd
15
  from sklearn.feature_extraction.text import TfidfVectorizer
16
  from sklearn.metrics.pairwise import linear_kernel
17
 
18
- client = OpenAI(
19
- # defaults to os.environ.get("OPENAI_API_KEY")
20
- api_key="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",
21
- )
22
-
23
- # Specify the sheet name in the Excel file
24
- excel_file_path = "1.csv"
25
- sheet_name = "Shortlisted Courses" # Replace with the actual sheet name
26
-
27
- # Read the Excel file into a Pandas DataFrame
28
- courses_df = pd.read_csv(excel_file_path)
29
-
30
- # Create a TF-IDF vectorizer
31
- tfidf_vectorizer = TfidfVectorizer(stop_words='english')
32
- tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))
33
-
34
- def html_coversion(gpt_content):
35
-
36
- # Provided data in text format
37
- data_text = gpt_content
38
-
39
- # Extract course details using a modified regular expression
40
- courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)
41
-
42
- # Process each tuple to remove the second occurrence of the course link
43
- processed_courses = []
44
- for course_tuple in courses:
45
- # Find the index of the second occurrence of the course link
46
- index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
47
- # Remove the second occurrence of the course link from the tuple
48
- processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
49
- processed_courses.append(processed_tuple)
50
-
51
- # Convert the processed list of tuples into a DataFrame
52
- df = pd.DataFrame(processed_courses, columns=['Course Name', 'Course Link'])
53
-
54
- # Convert the DataFrame to an HTML table
55
- html_table = df.to_html(index=False, escape=False)
56
-
57
- # Print or save the HTML table
58
- return html_table
59
-
60
- # Function to recommend courses based on user input using GPT and TF-IDF
61
- def recommend_courses(user_skill):
62
- # Combine user's input into a single string for TF-IDF
63
- user_input = f"{user_skill}"
64
-
65
- # Use TF-IDF and cosine similarity for initial course recommendations
66
- user_vector = tfidf_vectorizer.transform([user_input])
 
 
 
 
 
 
 
67
  cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
68
 
69
- # Get initial course recommendations based on similarity scores
70
  recommendations = courses_df.copy()
71
  recommendations['Similarity'] = cosine_similarities[0]
72
 
73
- # Sort by similarity and get top recommendations
74
- top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)
75
-
76
- # Generate a text summary of the initial recommendations
77
- initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)
78
-
79
-
80
- # Assume GPT generates HTML-formatted final recommendations
81
- final_recommendations_html = html_coversion(initial_recommendations_text)
82
-
83
- return final_recommendations_html
84
-
85
- # Gradio Interface with dynamically generated dropdown options
86
- iface = gr.Interface(
87
- fn=recommend_courses,
88
- inputs=[
89
- gr.Textbox("text", label="Enter expected skill"),
90
- #gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
91
- ],
92
- outputs="html",
93
- live=True
94
- )
95
-
96
- # Launch the Gradio interface and save the output to an HTML file
97
- iface.launch(share=True)
98
-
 
 
1
  # -*- coding: utf-8 -*-
2
+ """[DRAFT]SWAYAM_CHATBOT SYSTEM_Course Recommendation System.ipynb
3
 
4
  Automatically generated by Colaboratory.
5
 
6
  Original file is located at
7
+ https://colab.research.google.com/drive/1s4_kZDgJcvRr7kfnw12oFNus45E2oOr9
8
  """
9
 
10
+ # Commented out IPython magic to ensure Python compatibility.
11
+ # %pip install openai
12
+
13
  import openai
 
14
  import pandas as pd
15
  from sklearn.feature_extraction.text import TfidfVectorizer
16
  from sklearn.metrics.pairwise import linear_kernel
17
 
18
+ # Set your OpenAI API key
19
+ openai.api_key = "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" # Replace with your actual API key
20
+
21
+ # Sample course data
22
+ data = {
23
+ 'CourseID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
24
+ 'Title': ['Python Programming',
25
+ 'Data Science with Python',
26
+ 'Machine Learning',
27
+ 'Web Development',
28
+ 'Environmental Studies',
29
+ 'Business Communication (Language-English/ Hindi/ MIL)',
30
+ 'Management Principles and Applications',
31
+ 'Analytical Geometry',
32
+ 'Cost Accounting',
33
+ 'Principles of Micro Economics',
34
+ 'Human Resource Management',
35
+ 'Fundamentals of Financial Management',
36
+ 'Classical Political Philosophy',
37
+ 'Differential Calculus',
38
+ 'Sociology of Health and Medicine',
39
+ 'Economic History of India (1857-1947)'],
40
+ 'Description': [
41
+ 'Start your journey in programming by learning Python from scratch.',
42
+ 'Start your journey in Data Science and become data scientist by learning python and other data science libraries.',
43
+ 'Master your programming skills and dive into the world of machine learning with python and other machine learning libraries',
44
+ 'Start your journey in web development using python programming and Django library.',
45
+ 'Explore the intricate relationship between humanity and the environment, and learn how to make informed decisions to preserve and protect our planet.',
46
+ 'Enhance your communication skills in English, Hindi, or your mother tongue (MIL) to excel in the business world. Learn the art of effective written and verbal communication.',
47
+ 'Gain a comprehensive understanding of the fundamental principles of management and their real-world applications to thrive in today\'s dynamic business environment.',
48
+ 'Delve into the world of analytical geometry and master the mathematical techniques and concepts that underlie this fascinating branch of mathematics.',
49
+ 'Learn the essentials of cost accounting and financial analysis to make sound business decisions and optimize financial performance.',
50
+ 'Explore the principles of microeconomics, the study of individual economic behavior, and understand how economic decisions impact businesses and society.',
51
+ 'Gain insight into the management of human resources, from recruitment to employee development, and learn how effective HR practices drive organizational success.',
52
+ 'Understand the core principles of financial management, including budgeting, investment, and risk analysis, to make strategic financial decisions.',
53
+ 'Dive into the world of classical political philosophy and explore the influential works of thinkers like Plato, Aristotle, and more, to understand the foundations of political thought.',
54
+ 'Master the fundamental concepts of differential calculus, a branch of mathematics that deals with rates of change, and its applications in various fields.',
55
+ 'Explore the sociological aspects of health, illness, and healthcare systems. Understand how society shapes healthcare practices and policies.',
56
+ 'Take a journey through the economic history of India during a critical period of change and transformation, from 1857 to 1947, and understand the economic forces that shaped the nation.'
57
+ ]
58
+ }
59
+
60
+ # Create a DataFrame from the course data
61
+ courses_df = pd.DataFrame(data)
62
+
63
+ # Function to recommend courses based on user skills
64
+ def recommend_courses(user_skills):
65
+ # Combine the user's skills into a single string
66
+ user_skills = ', '.join(user_skills.split())
67
+
68
+ # Create a TF-IDF vectorizer to convert course descriptions into vectors
69
+ tfidf_vectorizer = TfidfVectorizer(stop_words='english')
70
+ tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Description'])
71
+
72
+ # Calculate cosine similarity between user skills and course descriptions
73
+ user_vector = tfidf_vectorizer.transform([user_skills])
74
  cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
75
 
76
+ # Get course recommendations based on similarity scores
77
  recommendations = courses_df.copy()
78
  recommendations['Similarity'] = cosine_similarities[0]
79
 
80
+ # Sort courses by similarity and recommend the top matches
81
+ recommendations = recommendations.sort_values(by='Similarity', ascending=False)
82
+ recommended_courses = recommendations[['CourseID', 'Title', 'Similarity']]
83
+
84
+ return recommended_courses
85
+
86
+ # Function to interact with GPT-3 and provide recommendations
87
+ def gpt_recommend_courses(user_input):
88
+ response = openai.Completion.create(
89
+ engine="text-davinci-002",
90
+ prompt=f"I have skills in {user_input}. What courses do you recommend?",
91
+ max_tokens=100,
92
+ n=1,
93
+ stop=None,
94
+ temperature=0.7,
95
+ )
96
+ recommendation_prompt = response.choices[0].text.strip()
97
+
98
+ return recommend_courses(recommendation_prompt)
99
+
100
+ # User input for skills
101
+ user_input = input("Enter your skills: ")
102
+
103
+ # Get course recommendations using GPT-3
104
+ recommended_courses = gpt_recommend_courses(user_input)
105
+ print("\nRecommended Courses:")
106
+ print(recommended_courses)