LawyerGPT / app.py
farhananis005's picture
Update app.py
88faaa1
raw
history blame
No virus
5.02 kB
import os
import openai
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OPENAI_API_KEY"]
def save_docs(docs):
import shutil
import os
destination_dir = "/home/user/app/docs/"
os.makedirs(destination_dir, exist_ok=True)
output_dir="/home/user/app/docs/"
for doc in docs:
shutil.copy(doc.name, output_dir)
return "File(s) saved successfully!"
def process_docs():
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
loader1 = DirectoryLoader('/home/user/app/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)
document1 = loader1.load()
loader2 = DirectoryLoader('/home/user/app/docs/', glob="./*.txt", loader_cls=TextLoader)
document2 = loader2.load()
loader3 = DirectoryLoader('/home/user/app/docs/', glob="./*.docx", loader_cls=Docx2txtLoader)
document3 = loader3.load()
document1.extend(document2)
document1.extend(document3)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
docs = text_splitter.split_documents(document1)
embeddings = OpenAIEmbeddings()
docs_db = FAISS.from_documents(docs, embeddings)
docs_db.save_local("/home/user/app/docs_db/")
return "File(s) processed successfully!"
def formatted_response(docs, response):
formatted_output = response + "\n\nSources"
for i, doc in enumerate(docs):
source_info = doc.metadata.get('source', 'Unknown source')
page_info = doc.metadata.get('page', None)
doc_name = source_info.split('/')[-1].strip()
if page_info is not None:
formatted_output += f"\n{doc_name}\tpage no {page_info}"
else:
formatted_output += f"\n{doc_name}"
return formatted_output
def search_docs(question):
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI
embeddings = OpenAIEmbeddings()
docs_db = FAISS.load_local("/home/user/app/docs_db/", embeddings)
docs = docs_db.similarity_search(question)
llm = ChatOpenAI(model_name='gpt-3.5-turbo')
chain = load_qa_chain(llm, chain_type="stuff")
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=question)
print(cb)
return formatted_response(docs, response)
def delete_docs():
import shutil
path1 = "/home/user/app/docs/"
path2 = "/home/user/app/docs_db/"
try:
shutil.rmtree(path1)
shutil.rmtree(path2)
return "Deleted Successfully"
except:
return "Already Deleted"
import gradio as gr
css = """
.col{
max-width: 50%;
margin: 0 auto;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("## <center>Lawyer GPT</center>")
with gr.Tab("Your AI Legal Assistant"):
with gr.Column(elem_classes="col"):
with gr.Tab("Upload and Process Documents"):
with gr.Column():
docs_upload_input = gr.Files(label="Upload File(s)")
docs_upload_button = gr.Button("Upload")
docs_upload_output = gr.Textbox(label="Output")
docs_process_button = gr.Button("Process")
docs_process_output = gr.Textbox(label="Output")
gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output])
with gr.Tab("Query Documents"):
with gr.Column():
docs_search_input = gr.Textbox(label="Enter Question")
docs_search_button = gr.Button("Search")
docs_search_output = gr.Textbox(label="Output")
docs_delete_button = gr.Button("Delete")
docs_delete_output = gr.Textbox(label="Output")
gr.ClearButton([docs_search_input, docs_search_output, docs_delete_output])
#########################################################################################################
docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
docs_search_button.click(search_docs, inputs=docs_search_input, outputs=docs_search_output)
docs_delete_button.click(delete_docs, inputs=None, outputs=docs_delete_output)
#########################################################################################################
demo.queue()
demo.launch()