Spaces:
Running
Running
File size: 5,023 Bytes
abc103b 88faaa1 abc103b 88faaa1 abc103b 88faaa1 abc103b 88faaa1 abc103b 88faaa1 abc103b 88faaa1 abc103b 88faaa1 abc103b 88faaa1 abc103b 655eb44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import os
import openai
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OPENAI_API_KEY"]
def save_docs(docs):
import shutil
import os
destination_dir = "/home/user/app/docs/"
os.makedirs(destination_dir, exist_ok=True)
output_dir="/home/user/app/docs/"
for doc in docs:
shutil.copy(doc.name, output_dir)
return "File(s) saved successfully!"
def process_docs():
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.document_loaders import Docx2txtLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
loader1 = DirectoryLoader('/home/user/app/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)
document1 = loader1.load()
loader2 = DirectoryLoader('/home/user/app/docs/', glob="./*.txt", loader_cls=TextLoader)
document2 = loader2.load()
loader3 = DirectoryLoader('/home/user/app/docs/', glob="./*.docx", loader_cls=Docx2txtLoader)
document3 = loader3.load()
document1.extend(document2)
document1.extend(document3)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
docs = text_splitter.split_documents(document1)
embeddings = OpenAIEmbeddings()
docs_db = FAISS.from_documents(docs, embeddings)
docs_db.save_local("/home/user/app/docs_db/")
return "File(s) processed successfully!"
def formatted_response(docs, response):
formatted_output = response + "\n\nSources"
for i, doc in enumerate(docs):
source_info = doc.metadata.get('source', 'Unknown source')
page_info = doc.metadata.get('page', None)
doc_name = source_info.split('/')[-1].strip()
if page_info is not None:
formatted_output += f"\n{doc_name}\tpage no {page_info}"
else:
formatted_output += f"\n{doc_name}"
return formatted_output
def search_docs(question):
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.callbacks import get_openai_callback
from langchain.chat_models import ChatOpenAI
embeddings = OpenAIEmbeddings()
docs_db = FAISS.load_local("/home/user/app/docs_db/", embeddings)
docs = docs_db.similarity_search(question)
llm = ChatOpenAI(model_name='gpt-3.5-turbo')
chain = load_qa_chain(llm, chain_type="stuff")
with get_openai_callback() as cb:
response = chain.run(input_documents=docs, question=question)
print(cb)
return formatted_response(docs, response)
def delete_docs():
import shutil
path1 = "/home/user/app/docs/"
path2 = "/home/user/app/docs_db/"
try:
shutil.rmtree(path1)
shutil.rmtree(path2)
return "Deleted Successfully"
except:
return "Already Deleted"
import gradio as gr
css = """
.col{
max-width: 50%;
margin: 0 auto;
display: flex;
flex-direction: column;
justify-content: center;
align-items: center;
}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown("## <center>Lawyer GPT</center>")
with gr.Tab("Your AI Legal Assistant"):
with gr.Column(elem_classes="col"):
with gr.Tab("Upload and Process Documents"):
with gr.Column():
docs_upload_input = gr.Files(label="Upload File(s)")
docs_upload_button = gr.Button("Upload")
docs_upload_output = gr.Textbox(label="Output")
docs_process_button = gr.Button("Process")
docs_process_output = gr.Textbox(label="Output")
gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output])
with gr.Tab("Query Documents"):
with gr.Column():
docs_search_input = gr.Textbox(label="Enter Question")
docs_search_button = gr.Button("Search")
docs_search_output = gr.Textbox(label="Output")
docs_delete_button = gr.Button("Delete")
docs_delete_output = gr.Textbox(label="Output")
gr.ClearButton([docs_search_input, docs_search_output, docs_delete_output])
#########################################################################################################
docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)
docs_search_button.click(search_docs, inputs=docs_search_input, outputs=docs_search_output)
docs_delete_button.click(delete_docs, inputs=None, outputs=docs_delete_output)
#########################################################################################################
demo.queue()
demo.launch() |