File size: 1,111 Bytes
ae9c1ea
 
 
 
 
 
 
 
 
ed0be22
1256a85
 
ae9c1ea
ab5688d
ae9c1ea
0d3f45a
ae9c1ea
ce7b644
ae9c1ea
 
 
 
ab5688d
b1dfc1e
ae9c1ea
 
 
 
b1dfc1e
d2f7d16
ae9c1ea
 
 
22f90c0
ae9c1ea
 
62eea5b
ae9c1ea
22f90c0
ae9c1ea
 
4f75d26
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
import openai
import pinecone
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

import streamlit as st

st.header("Document Question Answering")

directory = st.text_area("")

#directory = '/content/data'

def load_docs(directory):
  loader = DirectoryLoader(directory)
  documents = loader.load()
  return documents


def split_docs(documents, chunk_size=1000, chunk_overlap=20):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs


if directory:
    documents = load_docs(directory)
    st.write(len(documents))

    docs = split_docs(documents)
    print(len(docs))

    embeddings = OpenAIEmbeddings(model_name="ada")
    
    query_result = embeddings.embed_query("Hello world")
    st.write(len(query_result))