NickNYU commited on
Commit
45b3942
1 Parent(s): b5d3f34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -34
app.py CHANGED
@@ -1,13 +1,11 @@
1
- import os
2
- from llama_index import SimpleDirectoryReader
3
- from llama_index.node_parser import SimpleNodeParser
4
- from llama_index.data_structs.node import Node, DocumentRelationship
5
- from llama_index import VectorStoreIndex
6
- from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
7
  from langchain.llms import AzureOpenAI
8
  from langchain.embeddings.openai import OpenAIEmbeddings
9
- from llama_index import LangchainEmbedding, ServiceContext
10
- from llama_index import StorageContext, load_index_from_storage
 
11
 
12
  import logging
13
  import sys
@@ -18,24 +16,32 @@ logging.basicConfig(
18
  ) # logging.DEBUG for more verbose output
19
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- def main() -> None:
23
- documents = SimpleDirectoryReader("./data").load_data()
24
-
25
- # index = VectorStoreIndex.from_documents(documents)
26
-
27
- # parser = SimpleNodeParser()
28
- # nodes = parser.get_nodes_from_documents(documents)
29
- # index = VectorStoreIndex(nodes)
30
 
 
 
31
  # define embedding
32
  embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
33
  # define LLM
34
  llm_predictor = LLMPredictor(
35
  llm=AzureOpenAI(
36
- client=None,
37
  deployment_name="text-davinci-003",
38
  model="text-davinci-003",
 
39
  )
40
  )
41
 
@@ -43,26 +49,45 @@ def main() -> None:
43
  service_context = ServiceContext.from_defaults(
44
  llm_predictor=llm_predictor, embed_model=embedding
45
  )
 
 
 
 
 
46
 
47
- # build index
48
- index = VectorStoreIndex.from_documents(
49
- documents,
50
- service_context=service_context,
51
- )
 
 
 
 
 
 
 
 
 
52
 
53
- index.storage_context.persist(persist_dir="./dataset")
54
- storage_context = StorageContext.from_defaults(persist_dir="./dataset")
55
- index = load_index_from_storage(
56
- storage_context=storage_context, service_context=service_context
57
- )
58
 
59
- # index.vector_store.persist("./dataset")
60
- # query with embed_model specified
61
- query_engine = index.as_query_engine(
62
- retriever_mode="embedding", verbose=True, service_context=service_context
63
- )
64
- response = query_engine.query("请帮忙推荐一杯咖啡给我,我喜欢咖啡因")
65
- print(response)
 
 
 
 
 
 
 
 
 
66
 
67
 
68
  if __name__ == "__main__":
 
1
+ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
+ from llama_index import download_loader, GPTVectorStoreIndex
3
+ from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
 
 
 
4
  from langchain.llms import AzureOpenAI
5
  from langchain.embeddings.openai import OpenAIEmbeddings
6
+ import os
7
+ import pickle
8
+ import streamlit as st
9
 
10
  import logging
11
  import sys
 
16
  ) # logging.DEBUG for more verbose output
17
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
18
 
19
+ # Sidebar contents
20
+ with st.sidebar:
21
+ st.title("🤗💬 LLM Chat App")
22
+ st.markdown(
23
+ """
24
+ ## About
25
+ This app is an LLM-powered chatbot built using:
26
+ - [Streamlit](https://streamlit.io/)
27
+ - [LangChain](https://python.langchain.com/)
28
+ - [X-Pipe](https://github.com/ctripcorp/x-pipe)
29
+ """
30
+ )
31
+ # add_vertical_space(5)
32
+ st.write("Made by Nick")
33
 
 
 
 
 
 
 
 
 
34
 
35
+ def main() -> None:
36
+ st.header("X-Pipe Wiki 机器人 💬")
37
  # define embedding
38
  embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
39
  # define LLM
40
  llm_predictor = LLMPredictor(
41
  llm=AzureOpenAI(
 
42
  deployment_name="text-davinci-003",
43
  model="text-davinci-003",
44
+ client=None,
45
  )
46
  )
47
 
 
49
  service_context = ServiceContext.from_defaults(
50
  llm_predictor=llm_predictor, embed_model=embedding
51
  )
52
+ download_loader("GithubRepositoryReader")
53
+ docs = None
54
+ if os.path.exists("docs/docs.pkl"):
55
+ with open("docs/docs.pkl", "rb") as f:
56
+ docs = pickle.load(f)
57
 
58
+ if docs is None:
59
+ github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
60
+ loader = GithubRepositoryReader(
61
+ github_client,
62
+ owner="ctripcorp",
63
+ repo="x-pipe",
64
+ filter_directories=(
65
+ [".", "doc"],
66
+ GithubRepositoryReader.FilterType.INCLUDE,
67
+ ),
68
+ filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
69
+ verbose=True,
70
+ concurrent_requests=10,
71
+ )
72
 
73
+ docs = loader.load_data(branch="master")
 
 
 
 
74
 
75
+ with open("docs/docs.pkl", "wb") as f:
76
+ pickle.dump(docs, f)
77
+
78
+ index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
79
+
80
+ query_engine = index.as_query_engine(service_context=service_context)
81
+
82
+ query = st.text_input("X-Pipe Wiki 问题:")
83
+ if query:
84
+ index = GPTVectorStoreIndex.from_documents(
85
+ docs, service_context=service_context
86
+ )
87
+
88
+ query_engine = index.as_query_engine(service_context=service_context)
89
+ response = query_engine.query(query)
90
+ st.write(response)
91
 
92
 
93
  if __name__ == "__main__":