srinuksv commited on
Commit
fada25c
1 Parent(s): e0f195b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -16
app.py CHANGED
@@ -1,18 +1,100 @@
1
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- # Define the paths for the directories
4
- db_dir = 'db'
5
- data_dir = 'data'
6
-
7
- # Check if the directories already exist
8
- if not os.path.exists(db_dir):
9
- os.makedirs(db_dir)
10
- print(f"Directory '{db_dir}' created successfully.")
11
- else:
12
- print(f"Directory '{db_dir}' already exists.")
13
-
14
- if not os.path.exists(data_dir):
15
- os.makedirs(data_dir)
16
- print(f"Directory '{data_dir}' created successfully.")
17
- else:
18
- print(f"Directory '{data_dir}' already exists.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from dotenv import load_dotenv
3
+ import gradio as gr
4
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
5
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
6
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
+ from sentence_transformers import SentenceTransformer
8
+ from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
9
+ load_dotenv()
10
+ # Configure the Llama index settings
11
+ Settings.llm = HuggingFaceInferenceAPI(
12
+ model_name="google/gemma-1.1-7b-it",
13
+ tokenizer_name="google/gemma-1.1-7b-it",
14
+ context_window=3000,
15
+ token=os.getenv("HF_TOKEN"),
16
+ max_new_tokens=512,
17
+ generate_kwargs={"temperature": 0.1},
18
+ )
19
+ Settings.embed_model = HuggingFaceEmbedding(
20
+ model_name="BAAI/bge-small-en-v1.5"
21
+ )
22
 
23
+ # Define the directory for persistent storage and data
24
+ PERSIST_DIR = "db"
25
+ PDF_DIRECTORY = 'data' # Changed to the directory containing PDFs
26
+
27
+ # Ensure PDF directory exists
28
+ os.makedirs(PDF_DIRECTORY, exist_ok=True)
29
+ os.makedirs(PERSIST_DIR, exist_ok=True)
30
+
31
+ def data_ingestion_from_directory():
32
+ # Use SimpleDirectoryReader on the directory containing the PDF files
33
+ documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
34
+ storage_context = StorageContext.from_defaults()
35
+ index = VectorStoreIndex.from_documents(documents)
36
+ index.storage_context.persist(persist_dir=PERSIST_DIR)
37
+
38
+ def handle_query(query):
39
+ chat_text_qa_msgs = [
40
+ (
41
+ "user",
42
+ """
43
+ You are a Q&A assistant named RedfernsTech, created by the RedfernsTech team. You have been designed to provide accurate answers based on the context provided.
44
+ Context:
45
+ {context_str}
46
+ Question:
47
+ {query_str}
48
+ """
49
+ )
50
+ ]
51
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
52
+
53
+ # Load index from storage
54
+ storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
55
+ index = load_index_from_storage(storage_context)
56
+
57
+ query_engine = index.as_query_engine(text_qa_template=text_qa_template)
58
+ answer = query_engine.query(query)
59
+
60
+ if hasattr(answer, 'response'):
61
+ return answer.response
62
+ elif isinstance(answer, dict) and 'response' in answer:
63
+ return answer['response']
64
+ else:
65
+ return "Sorry, I couldn't find an answer."
66
+
67
+ # Example usage
68
+
69
+ # Process PDF ingestion from directory
70
+ print("Processing PDF ingestion from directory:", PDF_DIRECTORY)
71
+ data_ingestion_from_directory()
72
+
73
+ # Example query
74
+ query = "How do I use the RedfernsTech Q&A assistant?"
75
+ print("Query:", query)
76
+ response = handle_query(query)
77
+ print("Answer:", response)
78
+ # prompt: create a gradio chatbot for this
79
+
80
+
81
+
82
+ # Define the input and output components for the Gradio interface
83
+ input_component = gr.Textbox(
84
+ show_label=False,
85
+ placeholder="Ask me anything about the document..."
86
+ )
87
+
88
+ output_component = gr.Textbox()
89
+
90
+ # Create the Gradio interface
91
+ interface = gr.Interface(
92
+ fn=handle_query,
93
+ inputs=input_component,
94
+ outputs=output_component,
95
+ title="RedfernsTech Q&A Chatbot",
96
+ description="Ask me anything about the uploaded document."
97
+ )
98
+
99
+ # Launch the Gradio interface
100
+ interface.launch(server_port=7861, share=True)