File size: 2,500 Bytes
670064c
 
d445e66
670064c
d445e66
 
670064c
d445e66
670064c
d445e66
670064c
d445e66
670064c
d445e66
670064c
 
 
 
 
 
 
 
 
 
 
 
 
aaa0257
670064c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d445e66
 
 
670064c
d445e66
670064c
d445e66
 
 
 
670064c
d445e66
670064c
 
 
d445e66
 
670064c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import streamlit as st
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

import git 

from chromadb.utils import embedding_functions

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ['GOOGLE_API_KEY'], task_type="retrieval_query")

model = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=os.environ['GOOGLE_API_KEY'],temperature=0.2,convert_system_message_to_human=True)

def get_folder_paths(directory = "githubCode"):
    folder_paths = []
    for root, dirs, files in os.walk(directory):
        if '.git' in dirs:
            # Skip the directory if a .git folder is found
            dirs.remove('.git')
        for dir_name in dirs:
            folder_paths.append(os.path.join(root, dir_name))
    return folder_paths

directory_paths = get_folder_paths()
directory_paths.append("Code")
print("directory_paths: ", directory_paths)

with open("Code.txt", "w", encoding='utf-8') as output:
    for directory_path in directory_paths:
        for filename in os.listdir(directory_path):
            if filename.endswith((".py",".ipynb",".js", ".ts")):
                filepath = os.path.join(directory_path, filename)
                with open(filepath, "r", encoding='utf-8') as file:
                    code = file.read()
                    output.write(f"Filepath: {filepath}:\n\n")
                    output.write(code + "\n\n")

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader

loader = TextLoader("Code.txt", encoding="utf-8")
pages = loader.load_and_split()

# Split data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":5})

qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True

)

# Function to generate assistant's response using ask function
def generate_assistant_response(question):
    answer = qa_chain({"query": question})
    return answer['result']

# print(generate_assistant_response("Tell me about the instructor_embeddings function."))