Spaces:
Runtime error
Runtime error
File size: 1,111 Bytes
ae9c1ea ed0be22 1256a85 ae9c1ea ab5688d ae9c1ea 0d3f45a ae9c1ea ce7b644 ae9c1ea ab5688d b1dfc1e ae9c1ea b1dfc1e d2f7d16 ae9c1ea 22f90c0 ae9c1ea 62eea5b ae9c1ea 22f90c0 ae9c1ea 4f75d26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import os
import openai
import pinecone
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
import streamlit as st
st.header("Document Question Answering")
directory = st.text_area("")
#directory = '/content/data'
def load_docs(directory):
loader = DirectoryLoader(directory)
documents = loader.load()
return documents
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
docs = text_splitter.split_documents(documents)
return docs
if directory:
documents = load_docs(directory)
st.write(len(documents))
docs = split_docs(documents)
print(len(docs))
embeddings = OpenAIEmbeddings(model_name="ada")
query_result = embeddings.embed_query("Hello world")
st.write(len(query_result))
|