import os import openai os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["OPENAI_API_KEY"] def save_docs(docs): import shutil import os destination_dir = "/kaggle/working/docs/" os.makedirs(destination_dir, exist_ok=True) output_dir="/kaggle/working/docs/" for doc in docs: shutil.copy(doc.name, output_dir) return "File(s) saved successfully!" def process_docs(): from langchain.document_loaders import PyPDFLoader from langchain.document_loaders import DirectoryLoader from langchain.document_loaders import TextLoader from langchain.document_loaders import Docx2txtLoader from langchain.vectorstores import FAISS from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter loader1 = DirectoryLoader('/kaggle/working/docs/', glob="./*.pdf", loader_cls=PyPDFLoader) document1 = loader1.load() loader2 = DirectoryLoader('/kaggle/working/docs/', glob="./*.txt", loader_cls=TextLoader) document2 = loader2.load() loader3 = DirectoryLoader('/kaggle/working/docs/', glob="./*.docx", loader_cls=Docx2txtLoader) document3 = loader3.load() document1.extend(document2) document1.extend(document3) text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) docs = text_splitter.split_documents(document1) embeddings = OpenAIEmbeddings() docs_db = FAISS.from_documents(docs, embeddings) docs_db.save_local("/kaggle/working/docs_db/") return "File(s) processed successfully!" def formatted_response(docs, response): formatted_output = response + "\n\nSources" for i, doc in enumerate(docs): source_info = doc.metadata.get('source', 'Unknown source') page_info = doc.metadata.get('page', None) doc_name = source_info.split('/')[-1].strip() if page_info is not None: formatted_output += f"\n{doc_name}\tpage no {page_info}" else: formatted_output += f"\n{doc_name}" return formatted_output def search_docs(question): from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.callbacks import get_openai_callback from langchain.chat_models import ChatOpenAI embeddings = OpenAIEmbeddings() docs_db = FAISS.load_local("/kaggle/working/docs_db/", embeddings) docs = docs_db.similarity_search(question) llm = ChatOpenAI(model_name='gpt-3.5-turbo') chain = load_qa_chain(llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.run(input_documents=docs, question=question) print(cb) return formatted_response(docs, response) def delete_docs(): import shutil path1 = "/kaggle/working/docs/" path2 = "/kaggle/working/docs_db/" try: shutil.rmtree(path1) shutil.rmtree(path2) return "Deleted Successfully" except: return "Already Deleted" import gradio as gr css = """ .col{ max-width: 50%; margin: 0 auto; display: flex; flex-direction: column; justify-content: center; align-items: center; } """ with gr.Blocks(css=css) as demo: gr.Markdown("##
Lawyer GPT
") with gr.Tab("Your AI Legal Assistant"): with gr.Column(elem_classes="col"): with gr.Tab("Upload and Process Documents"): with gr.Column(): docs_upload_input = gr.Files(label="Upload File(s)") docs_upload_button = gr.Button("Upload") docs_upload_output = gr.Textbox(label="Output") docs_process_button = gr.Button("Process") docs_process_output = gr.Textbox(label="Output") gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output]) with gr.Tab("Query Documents"): with gr.Column(): docs_search_input = gr.Textbox(label="Enter Question") docs_search_button = gr.Button("Search") docs_search_output = gr.Textbox(label="Output") docs_delete_button = gr.Button("Delete") docs_delete_output = gr.Textbox(label="Output") gr.ClearButton([docs_search_input, docs_search_output, docs_delete_output]) ######################################################################################################### docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output) docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output) docs_search_button.click(search_docs, inputs=docs_search_input, outputs=docs_search_output) docs_delete_button.click(delete_docs, inputs=None, outputs=docs_delete_output) ######################################################################################################### demo.queue() demo.launch(debug=True, share=True)