File size: 2,797 Bytes
a22f65f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298864d
612f625
59fea76
a22f65f
 
612f625
a22f65f
298864d
a22f65f
 
612f625
a0bbff7
a22f65f
 
298864d
a22f65f
298864d
 
a22f65f
298864d
a22f65f
 
a632443
 
a22f65f
a0bbff7
 
a22f65f
 
 
298864d
 
 
a22f65f
298864d
612f625
a22f65f
612f625
a0bbff7
612f625
a22f65f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os

import streamlit as st
from gdown import download_folder
from llama_index import ServiceContext
from llama_index import SimpleDirectoryReader
from llama_index import VectorStoreIndex
from llama_index import set_global_service_context
from llama_index.embeddings import OpenAIEmbedding
from llama_index.llms import AzureOpenAI


@st.cache_resource(show_spinner=False)
def download_test_data():
    # url = f"https://drive.google.com/drive/folders/uc?export=download&confirm=pbef&id={file_id}"
    url = "https://drive.google.com/drive/folders/1uDSAWtLvp1YPzfXUsK_v6DeWta16pq6y"
    with st.spinner(text="Downloading test data. This might take a minute."):
        # @TODO: replace gown solution with a custom solution compatible with GitHub and
        # use st.progress to get more verbose during download
        download_folder(url=url, quiet=False, use_cookies=False, output="./data/")


@st.cache_resource(show_spinner=False)
def load_data():
    with st.spinner(text="Loading and indexing the provided dataset – hang tight! This may take a few seconds."):
        documents = SimpleDirectoryReader(input_dir="./data", recursive=True).load_data()

    with st.spinner(text="Setting up Azure OpenAI..."):
        llm = AzureOpenAI(
            model="gpt-3.5-turbo",
            engine=st.secrets["ENGINE"],
            temperature=0.5,
            api_key=os.environ["OPENAI_API_KEY"],
            api_base=st.secrets["OPENAI_API_BASE"],
            api_type="azure",
            api_version=st.secrets["OPENAI_API_VERSION"],
            system_prompt="You are an expert on André's research and your job is to answer"
            "technical questions. Assume that all questions are related to"
            "André's research. Keep your answers technical and based on facts;"
            " do not hallucinate features.",
        )

    with st.spinner(text="Setting up OpenAI Embedding..."):
        # You need to deploy your own embedding model as well as your own chat completion model
        embed_model = OpenAIEmbedding(
            model="text-embedding-ada-002",
            deployment_name=st.secrets["ENGINE_EMBEDDING"],
            api_key=os.environ["OPENAI_API_KEY"],
            api_base=st.secrets["OPENAI_API_BASE"],
            api_type="azure",
            api_version=st.secrets["OPENAI_API_VERSION"],
            embed_batch_size=10,  # set to low value to reduce rate limit -> may degrade response runtime
        )

    with st.spinner(text="Setting up Vector Store Index..."):
        service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)  # , chunk_size=512)
        set_global_service_context(service_context)
        index = VectorStoreIndex.from_documents(documents)  # , service_context=service_context)
        return index