andreped commited on
Commit
a0bbff7
1 Parent(s): 91aba60

Added more verbose to data setup.

Browse files
Files changed (1) hide show
  1. chatbot/data.py +7 -0
chatbot/data.py CHANGED
@@ -23,6 +23,8 @@ def download_test_data():
23
  def load_data():
24
  with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
25
  documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
 
 
26
  llm = AzureOpenAI(
27
  model="gpt-3.5-turbo",
28
  engine=st.secrets["ENGINE"],
@@ -36,6 +38,8 @@ def load_data():
36
  "André's research. Keep your answers technical and based on facts;"
37
  " do not hallucinate features.",
38
  )
 
 
39
  # You need to deploy your own embedding model as well as your own chat completion model
40
  embed_model = OpenAIEmbedding(
41
  model="text-embedding-ada-002",
@@ -44,7 +48,10 @@ def load_data():
44
  api_base=st.secrets["OPENAI_API_BASE"],
45
  api_type="azure",
46
  api_version=st.secrets["OPENAI_API_VERSION"],
 
47
  )
 
 
48
  service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
49
  set_global_service_context(service_context)
50
  index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)
 
23
  def load_data():
24
  with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
25
  documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()
26
+
27
+ with st.spinner(text="Setting up Azure OpenAI..."):
28
  llm = AzureOpenAI(
29
  model="gpt-3.5-turbo",
30
  engine=st.secrets["ENGINE"],
 
38
  "André's research. Keep your answers technical and based on facts;"
39
  " do not hallucinate features.",
40
  )
41
+
42
+ with st.spinner(text="Setting up OpenAI Embedding..."):
43
  # You need to deploy your own embedding model as well as your own chat completion model
44
  embed_model = OpenAIEmbedding(
45
  model="text-embedding-ada-002",
 
48
  api_base=st.secrets["OPENAI_API_BASE"],
49
  api_type="azure",
50
  api_version=st.secrets["OPENAI_API_VERSION"],
51
+ embed_batch_size=10, # set to one to reduce rate limit -> may degrade response runtime
52
  )
53
+
54
+ with st.spinner(text="Setting up Vector Store Index..."):
55
  service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
56
  set_global_service_context(service_context)
57
  index = VectorStoreIndex.from_documents(documents) # , service_context=service_context)