MandarBhalerao commited on
Commit
9630cd6
1 Parent(s): 17ae79e

Add files via upload

Browse files

removed all app related files outside

Files changed (3) hide show
  1. app.py +46 -0
  2. chains.py +98 -0
  3. utils.py +26 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.document_loaders import WebBaseLoader
3
+
4
+ from chains import Chain
5
+ # from portfolio import Portfolio
6
+ from utils import clean_text, extract_text_from_pdf
7
+
8
+
9
+ def create_streamlit_app(llm, clean_text):
10
+ st.title("📧 Welcome to Cold E-Mail Generator")
11
+
12
+ # PDF upload section
13
+ uploaded_file = st.file_uploader("Upload your resume as PDF", type=["pdf"])
14
+ pdf_text = extract_text_from_pdf(uploaded_file)
15
+ # if pdf_text:
16
+ # st.text_area("Extracted Text", value=pdf_text, height=300)
17
+
18
+
19
+ url_input = st.text_input("Enter the URL of Job Posting:", value="https://careers.myntra.com/job-detail/?id=7431200002")
20
+ submit_button = st.button("Generate E-mail")
21
+
22
+ if submit_button:
23
+ try:
24
+ loader = WebBaseLoader([url_input])
25
+ data = clean_text(loader.load().pop().page_content) # cleans any unnecessary garbage text
26
+ jobs = llm.extract_jobs(data) # create json objects for the job
27
+ for job in jobs: # this is for if one web page has multiple jobs
28
+ # skills = job.get('skills', [])
29
+ summarized_text = llm.summarize_pdf(pdf_text)
30
+ # st.text_area(summarized_text)
31
+ email = llm.write_mail(job, summarized_text) # write the email
32
+ # st.code(email, language='markdown')
33
+ st.text_area("Email is as follows", value=email, height=500)
34
+
35
+ # st.code('hello')
36
+ except Exception as e:
37
+ st.error(f"An Error Occurred: {e}")
38
+
39
+
40
+ if __name__ == "__main__":
41
+ chain = Chain()
42
+ # portfolio = Portfolio()
43
+ st.set_page_config(layout="wide", page_title="Cold Email Generator", page_icon="📧")
44
+ create_streamlit_app(chain, clean_text)
45
+
46
+
chains.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_groq import ChatGroq
3
+ from langchain_core.prompts import PromptTemplate
4
+ from langchain_core.output_parsers import JsonOutputParser
5
+ from langchain_core.exceptions import OutputParserException
6
+ from dotenv import load_dotenv
7
+
8
+ import streamlit as st
9
+
10
+ GROQ_API_KEY = st.secrets["default"]["GROQ_API_KEY"]
11
+
12
+ # using this we can have a file called .env in your root folder where you can keep your API key.
13
+ # load_dotenv() # This will find the .env file and it will set the things in that file as your environment variable
14
+
15
+ # print(os.getenv("GROQ_API_KEY")) # just for testing
16
+
17
+ class Chain:
18
+ def __init__(self):
19
+ self.llm = ChatGroq(temperature=0, groq_api_key=GROQ_API_KEY, model_name="llama-3.1-70b-versatile")
20
+ # self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-70b-versatile")
21
+
22
+
23
+ # function for extracting the job description and then passing it to a json parser to convert it to json
24
+ def extract_jobs(self, cleaned_text):
25
+ prompt_extract = PromptTemplate.from_template(
26
+ """
27
+ ### SCRAPED TEXT FROM WEBSITE:
28
+ {page_data}
29
+ ### INSTRUCTION:
30
+ The scraped text is from the career's page of a website.
31
+ Your job is to extract the job postings and return them in JSON format containing the following keys: `role`, `experience`, `skills` and `description`.
32
+ Only return the valid JSON.
33
+ ### VALID JSON (NO PREAMBLE):
34
+ """
35
+ )
36
+ chain_extract = prompt_extract | self.llm
37
+ res = chain_extract.invoke(input={"page_data": cleaned_text})
38
+ try:
39
+ json_parser = JsonOutputParser()
40
+ res = json_parser.parse(res.content)
41
+ # Check if the result is a list and extract the first dictionary
42
+ # if isinstance(json_res, list):
43
+ # json_res = json_res[0]
44
+
45
+ except OutputParserException:
46
+ raise OutputParserException("Context too big. Unable to parse jobs.")
47
+ return res if isinstance(res, list) else [res]
48
+
49
+
50
+ def summarize_pdf(self, pdf_data):
51
+ prompt_extract = PromptTemplate.from_template(
52
+ """
53
+ ### PDF DATA OBTAINED FROM RESUME:
54
+ {pdf_data}
55
+ ### INSTRUCTION:
56
+ The data is from the resume of a person.
57
+ Your job is to extract all the details of this person and summarize it in 200 words, which includes name, education, experience, projects, skills.
58
+ ### (NO PREAMBLE):
59
+ """
60
+ )
61
+ chain_extract = prompt_extract | self.llm # this will form a langchain chain ie you are getting a prompt and passing it to LLM
62
+ res2 = chain_extract.invoke(input={'pdf_data':pdf_data})
63
+ # print(res.content)
64
+ summary = res2.content
65
+ return summary
66
+
67
+ def write_mail(self, job_description, summary):
68
+ prompt_email = PromptTemplate.from_template(
69
+ """
70
+ ### JOB DESCRIPTION:
71
+ This is a job description
72
+
73
+ {job_description}
74
+
75
+ ### INSTRUCTION:
76
+ These are the person's details.
77
+ {summary}
78
+ Consider yourself as this person.
79
+
80
+ Introduce yourself in an engaging way from above with your name from the above details and your current designation.
81
+
82
+ Try to find some things in the job description which are similar with your details. Mention those things which are similar.
83
+ Do not mention anything which is not present in the details.
84
+
85
+ Your job is to write a cold email of about 250 words to the hiring manager regarding the job mentioned above describing the capability of you
86
+ in fulfilling their needs. The cold email must be engaging to read.
87
+ End the email with Name and Current place where your are working or studying.
88
+ Do not provide a preamble.
89
+ ### EMAIL (NO PREAMBLE):
90
+
91
+ """
92
+ )
93
+ chain_email = prompt_email | self.llm
94
+ res = chain_email.invoke({"job_description": str(job_description), "summary": summary})
95
+ return res.content
96
+
97
+ # if __name__ == "__main__":
98
+ # print(os.getenv("GROQ_API_KEY"))
utils.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pdfplumber
3
+
4
+
5
+ # remove not required things and clean the text
6
+ def clean_text(text):
7
+ # Remove HTML tags
8
+ text = re.sub(r'<[^>]*?>', '', text)
9
+ # Remove URLs
10
+ text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', '', text)
11
+ # Remove special characters
12
+ text = re.sub(r'[^a-zA-Z0-9 ]', '', text)
13
+ # Replace multiple spaces with a single space
14
+ text = re.sub(r'\s{2,}', ' ', text)
15
+ # Trim leading and trailing whitespace
16
+ text = text.strip()
17
+ # Remove extra whitespace
18
+ text = ' '.join(text.split())
19
+ return text
20
+
21
+ def extract_text_from_pdf(uploaded_file):
22
+ if uploaded_file is not None:
23
+ with pdfplumber.open(uploaded_file) as pdf:
24
+ pages = [page.extract_text() for page in pdf.pages]
25
+ return "\n".join(pages) if pages else ""
26
+ return ""