File size: 5,023 Bytes
abc103b
 
 
 
 
 
 
 
 
 
 
 
88faaa1
abc103b
 
88faaa1
abc103b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88faaa1
abc103b
 
88faaa1
abc103b
 
88faaa1
abc103b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88faaa1
abc103b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88faaa1
abc103b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88faaa1
 
abc103b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655eb44
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165


import os
import openai

os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OPENAI_API_KEY"]
def save_docs(docs):

    import shutil
    import os

    destination_dir = "/home/user/app/docs/"
    os.makedirs(destination_dir, exist_ok=True)

    output_dir="/home/user/app/docs/"

    for doc in docs:
      shutil.copy(doc.name, output_dir)

    return "File(s) saved successfully!"

def process_docs():

    from langchain.document_loaders import PyPDFLoader
    from langchain.document_loaders import DirectoryLoader
    from langchain.document_loaders import TextLoader
    from langchain.document_loaders import Docx2txtLoader
    from langchain.vectorstores import FAISS
    from langchain.embeddings.openai import OpenAIEmbeddings
    from langchain.text_splitter import RecursiveCharacterTextSplitter

    loader1 = DirectoryLoader('/home/user/app/docs/', glob="./*.pdf", loader_cls=PyPDFLoader)
    document1 = loader1.load()

    loader2 = DirectoryLoader('/home/user/app/docs/', glob="./*.txt", loader_cls=TextLoader)
    document2 = loader2.load()

    loader3 = DirectoryLoader('/home/user/app/docs/', glob="./*.docx", loader_cls=Docx2txtLoader)
    document3 = loader3.load()

    document1.extend(document2)
    document1.extend(document3)

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )

    docs = text_splitter.split_documents(document1)
    embeddings = OpenAIEmbeddings()

    docs_db = FAISS.from_documents(docs, embeddings)
    docs_db.save_local("/home/user/app/docs_db/")

    return "File(s) processed successfully!"

def formatted_response(docs, response):

    formatted_output = response + "\n\nSources"

    for i, doc in enumerate(docs):
        source_info = doc.metadata.get('source', 'Unknown source')
        page_info = doc.metadata.get('page', None)

        doc_name = source_info.split('/')[-1].strip()

        if page_info is not None:
            formatted_output += f"\n{doc_name}\tpage no {page_info}"
        else:
            formatted_output += f"\n{doc_name}"

    return formatted_output

def search_docs(question):

    from langchain.embeddings.openai import OpenAIEmbeddings
    from langchain.vectorstores import FAISS
    from langchain.chains.question_answering import load_qa_chain
    from langchain.callbacks import get_openai_callback
    from langchain.chat_models import ChatOpenAI

    embeddings = OpenAIEmbeddings()
    docs_db = FAISS.load_local("/home/user/app/docs_db/", embeddings)
    docs = docs_db.similarity_search(question)

    llm = ChatOpenAI(model_name='gpt-3.5-turbo')
    chain = load_qa_chain(llm, chain_type="stuff")

    with get_openai_callback() as cb:
        response = chain.run(input_documents=docs, question=question)
        print(cb)

    return formatted_response(docs, response)

def delete_docs():

    import shutil

    path1 = "/home/user/app/docs/"
    path2 = "/home/user/app/docs_db/"

    try:
        shutil.rmtree(path1)
        shutil.rmtree(path2)
        return "Deleted Successfully"

    except:
        return "Already Deleted"

import gradio as gr

css = """
.col{
    max-width: 50%;
    margin: 0 auto;
    display: flex;
    flex-direction: column;
    justify-content: center;
    align-items: center;
}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("## <center>Lawyer GPT</center>")

    with gr.Tab("Your AI Legal Assistant"):
      with gr.Column(elem_classes="col"):

        with gr.Tab("Upload and Process Documents"):
          with gr.Column():

            docs_upload_input = gr.Files(label="Upload File(s)")
            docs_upload_button = gr.Button("Upload")
            docs_upload_output = gr.Textbox(label="Output")

            docs_process_button = gr.Button("Process")
            docs_process_output = gr.Textbox(label="Output")

            gr.ClearButton([docs_upload_input, docs_upload_output, docs_process_output])

        with gr.Tab("Query Documents"):
          with gr.Column():

            docs_search_input = gr.Textbox(label="Enter Question")
            docs_search_button = gr.Button("Search")
            docs_search_output = gr.Textbox(label="Output")

            docs_delete_button = gr.Button("Delete")
            docs_delete_output = gr.Textbox(label="Output")

            gr.ClearButton([docs_search_input, docs_search_output, docs_delete_output])

    #########################################################################################################
    docs_upload_button.click(save_docs, inputs=docs_upload_input, outputs=docs_upload_output)
    docs_process_button.click(process_docs, inputs=None, outputs=docs_process_output)

    docs_search_button.click(search_docs, inputs=docs_search_input, outputs=docs_search_output)

    docs_delete_button.click(delete_docs, inputs=None, outputs=docs_delete_output)
    #########################################################################################################

demo.queue()
demo.launch()