farhananis005 commited on
Commit
abc103b
1 Parent(s): 77fa50c

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +165 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ import os
4
+ import openai
5
+
6
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
7
+ os.environ["OPENAI_API_KEY"]
8
+ def save_docs(docs):
9
+
10
+ import shutil
11
+ import os
12
+
13
+ destination_dir = "/kaggle/working/docs/"
14
+ os.makedirs(destination_dir, exist_ok=True)
15
+
16
+ output_dir="/kaggle/working/docs/"
17
+
18
+ for doc in docs:
19
+ shutil.copy(doc.name, output_dir)
20
+
21
+ return "File(s) saved successfully!"
22
+
23
+ def process_docs():
24
+
25
+ from langchain.document_loaders import PyPDFLoader
26
+ from langchain.document_loaders import DirectoryLoader
27
+ from langchain.document_loaders import TextLoader
28
+ from langchain.document_loaders import Docx2txtLoader
29
+ from langchain.vectorstores import FAISS
30
+ from langchain.embeddings.openai import OpenAIEmbeddings
31
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
32
+
33
+ loader1 = DirectoryLoader('/kaggle/working/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)
34
+ document1 = loader1.load()
35
+
36
+ loader2 = DirectoryLoader('/kaggle/working/docs/', glob="./*.txt", loader_cls=TextLoader)
37
+ document2 = loader2.load()
38
+
39
+ loader3 = DirectoryLoader('/kaggle/working/docs/', glob="./*.docx", loader_cls=Docx2txtLoader)
40
+ document3 = loader3.load()
41
+
42
+ document1.extend(document2)
43
+ document1.extend(document3)
44
+
45
+ text_splitter = RecursiveCharacterTextSplitter(
46
+ chunk_size=1000,
47
+ chunk_overlap=200,
48
+ length_function=len
49
+ )
50
+
51
+ docs = text_splitter.split_documents(document1)
52
+ embeddings = OpenAIEmbeddings()
53
+
54
+ docs_db = FAISS.from_documents(docs, embeddings)
55
+ docs_db.save_local("/kaggle/working/docs_db/")
56
+
57
+ return "File(s) processed successfully!"
58
+
59
+ def formatted_response(docs, response):
60
+
61
+ formatted_output = response + "\n\nSources"
62
+
63
+ for i, doc in enumerate(docs):
64
+ source_info = doc.metadata.get('source', 'Unknown source')
65
+ page_info = doc.metadata.get('page', None)
66
+
67
+ doc_name = source_info.split('/')[-1].strip()
68
+
69
+ if page_info is not None:
70
+ formatted_output += f"\n{doc_name}\tpage no {page_info}"
71
+ else:
72
+ formatted_output += f"\n{doc_name}"
73
+
74
+ return formatted_output
75
+
76
+ def search_docs(question):
77
+
78
+ from langchain.embeddings.openai import OpenAIEmbeddings
79
+ from langchain.vectorstores import FAISS
80
+ from langchain.chains.question_answering import load_qa_chain
81
+ from langchain.callbacks import get_openai_callback
82
+ from langchain.chat_models import ChatOpenAI
83
+
84
+ embeddings = OpenAIEmbeddings()
85
+ docs_db = FAISS.load_local("/kaggle/working/docs_db/", embeddings)
86
+ docs = docs_db.similarity_search(question)
87
+
88
+ llm = ChatOpenAI(model_name='gpt-3.5-turbo')
89
+ chain = load_qa_chain(llm, chain_type="stuff")
90
+
91
+ with get_openai_callback() as cb:
92
+ response = chain.run(input_documents=docs, question=question)
93
+ print(cb)
94
+
95
+ return formatted_response(docs, response)
96
+
97
+ def delete_docs():
98
+
99
+ import shutil
100
+
101
+ path1 = "/kaggle/working/docs/"
102
+ path2 = "/kaggle/working/docs_db/"
103
+
104
+ try:
105
+ shutil.rmtree(path1)
106
+ shutil.rmtree(path2)
107
+ return "Deleted Successfully"
108
+
109
+ except:
110
+ return "Already Deleted"
111
+
112
+ import gradio as gr
113
+
114
+ css = """
115
+ .col{
116
+ max-width: 50%;
117
+ margin: 0 auto;
118
+ display: flex;
119
+ flex-direction: column;
120
+ justify-content: center;
121
+ align-items: center;
122
+ }
123
+ """
124
+
125
+ with gr.Blocks(css=css) as demo:
126
+ gr.Markdown("## <center>Lawyer GPT</center>")
127
+
128
+ with gr.Tab("Your AI Legal Assistant"):
129
+ with gr.Column(elem_classes="col"):
130
+
131
+ with gr.Tab("Upload and Process Documents"):
132
+ with gr.Column():
133
+
134
+ docs_upload_input = gr.Files(label="Upload File(s)")
135
+ docs_upload_button = gr.Button("Upload")
136
+ docs_upload_output = gr.Textbox(label="Output")
137
+
138
+ docs_process_button = gr.Button("Process")
139
+ docs_process_output = gr.Textbox(label="Output")
140
+
141
+ gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output])
142
+
143
+ with gr.Tab("Query Documents"):
144
+ with gr.Column():
145
+
146
+ docs_search_input = gr.Textbox(label="Enter Question")
147
+ docs_search_button = gr.Button("Search")
148
+ docs_search_output = gr.Textbox(label="Output")
149
+
150
+ docs_delete_button = gr.Button("Delete")
151
+ docs_delete_output = gr.Textbox(label="Output")
152
+
153
+ gr.ClearButton([docs_search_input, docs_search_output, docs_delete_output])
154
+
155
+ #########################################################################################################
156
+ docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
157
+ docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
158
+
159
+ docs_search_button.click(search_docs, inputs=docs_search_input, outputs=docs_search_output)
160
+
161
+ docs_delete_button.click(delete_docs, inputs=None, outputs=docs_delete_output)
162
+ #########################################################################################################
163
+
164
+ demo.queue()
165
+ demo.launch(debug=True, share=True)
requirements.txt ADDED
File without changes