Spaces:
Running
Running
import os | |
import openai | |
os.environ["TOKENIZERS_PARALLELISM"] = "false" | |
os.environ["OPENAI_API_KEY"] | |
def save_docs(docs): | |
import shutil | |
import os | |
destination_dir = "/home/user/app/docs/" | |
os.makedirs(destination_dir, exist_ok=True) | |
output_dir="/home/user/app/docs/" | |
for doc in docs: | |
shutil.copy(doc.name, output_dir) | |
return "File(s) saved successfully!" | |
def process_docs(): | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.document_loaders import DirectoryLoader | |
from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import Docx2txtLoader | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
loader1 = DirectoryLoader('/home/user/app/docs/', glob="./*.pdf", loader_cls=PyPDFLoader) | |
document1 = loader1.load() | |
loader2 = DirectoryLoader('/home/user/app/docs/', glob="./*.txt", loader_cls=TextLoader) | |
document2 = loader2.load() | |
loader3 = DirectoryLoader('/home/user/app/docs/', glob="./*.docx", loader_cls=Docx2txtLoader) | |
document3 = loader3.load() | |
document1.extend(document2) | |
document1.extend(document3) | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1000, | |
chunk_overlap=200, | |
length_function=len | |
) | |
docs = text_splitter.split_documents(document1) | |
embeddings = OpenAIEmbeddings() | |
docs_db = FAISS.from_documents(docs, embeddings) | |
docs_db.save_local("/home/user/app/docs_db/") | |
return "File(s) processed successfully!" | |
def formatted_response(docs, response): | |
formatted_output = response + "\n\nSources" | |
for i, doc in enumerate(docs): | |
source_info = doc.metadata.get('source', 'Unknown source') | |
page_info = doc.metadata.get('page', None) | |
doc_name = source_info.split('/')[-1].strip() | |
if page_info is not None: | |
formatted_output += f"\n{doc_name}\tpage no {page_info}" | |
else: | |
formatted_output += f"\n{doc_name}" | |
return formatted_output | |
def search_docs(question): | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain.callbacks import get_openai_callback | |
from langchain.chat_models import ChatOpenAI | |
embeddings = OpenAIEmbeddings() | |
docs_db = FAISS.load_local("/home/user/app/docs_db/", embeddings) | |
docs = docs_db.similarity_search(question) | |
llm = ChatOpenAI(model_name='gpt-3.5-turbo') | |
chain = load_qa_chain(llm, chain_type="stuff") | |
with get_openai_callback() as cb: | |
response = chain.run(input_documents=docs, question=question) | |
print(cb) | |
return formatted_response(docs, response) | |
def delete_docs(): | |
import shutil | |
path1 = "/home/user/app/docs/" | |
path2 = "/home/user/app/docs_db/" | |
try: | |
shutil.rmtree(path1) | |
shutil.rmtree(path2) | |
return "Deleted Successfully" | |
except: | |
return "Already Deleted" | |
import gradio as gr | |
css = """ | |
.col{ | |
max-width: 50%; | |
margin: 0 auto; | |
display: flex; | |
flex-direction: column; | |
justify-content: center; | |
align-items: center; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown("## <center>Lawyer GPT</center>") | |
with gr.Tab("Your AI Legal Assistant"): | |
with gr.Column(elem_classes="col"): | |
with gr.Tab("Upload and Process Documents"): | |
with gr.Column(): | |
docs_upload_input = gr.Files(label="Upload File(s)") | |
docs_upload_button = gr.Button("Upload") | |
docs_upload_output = gr.Textbox(label="Output") | |
docs_process_button = gr.Button("Process") | |
docs_process_output = gr.Textbox(label="Output") | |
gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output]) | |
with gr.Tab("Query Documents"): | |
with gr.Column(): | |
docs_search_input = gr.Textbox(label="Enter Question") | |
docs_search_button = gr.Button("Search") | |
docs_search_output = gr.Textbox(label="Output") | |
docs_delete_button = gr.Button("Delete") | |
docs_delete_output = gr.Textbox(label="Output") | |
gr.ClearButton([docs_search_input, docs_search_output, docs_delete_output]) | |
######################################################################################################### | |
docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output) | |
docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output) | |
docs_search_button.click(search_docs, inputs=docs_search_input, outputs=docs_search_output) | |
docs_delete_button.click(delete_docs, inputs=None, outputs=docs_delete_output) | |
######################################################################################################### | |
demo.queue() | |
demo.launch() |