S0ham075 commited on
Commit
b7064d3
1 Parent(s): 9e30ce6
Files changed (9) hide show
  1. .gitignore +1 -0
  2. Dockerfile +11 -0
  3. LICENSE +21 -0
  4. agent.py +127 -0
  5. app.py +58 -0
  6. compose.yml +29 -0
  7. main.py +110 -0
  8. query.py +65 -0
  9. requirements.txt +11 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8
2
+
3
+ WORKDIR /app
4
+ RUN pip install --upgrade pip
5
+ COPY requirements.txt ./
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["streamlit","run","app.py" ]
11
+ # CMD [ "python","agent.py" ]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 s0ham075
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
agent.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains.conversation.memory import ConversationBufferWindowMemory
2
+ from langchain_together import Together
3
+ from main import get_repo_name
4
+ import os
5
+ import requests
6
+ import json
7
+ llm = Together(
8
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
9
+ temperature=0,
10
+ max_tokens=1024,
11
+ together_api_key="d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0"
12
+ )
13
+
14
+
15
+ #tools
16
+ from langchain.agents import Tool
17
+ from langchain.tools import BaseTool
18
+ from langchain_community.utilities import SerpAPIWrapper
19
+ search = SerpAPIWrapper()
20
+ search_tool = Tool(
21
+ name = "search",
22
+ func=search.run,
23
+ description="useful for when you need to answer questions about current events. You should ask targeted questions"
24
+ )
25
+
26
+ from langchain_community.utilities import StackExchangeAPIWrapper
27
+ stackexchange = StackExchangeAPIWrapper()
28
+ stackexchange_tool = Tool(
29
+ name="error-search",
30
+ func=stackexchange.run,
31
+ description="useful for when you need information regarding a programming error. You should pass the error directly"
32
+ )
33
+
34
+ from langchain_community.vectorstores import Qdrant
35
+ import qdrant_client
36
+ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
37
+ embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
38
+
39
+ client = qdrant_client.QdrantClient(
40
+ os.getenv("QDRANT_HOST"),
41
+ api_key=os.getenv("QDRANT_API_KEY")
42
+ )
43
+
44
+
45
+ # Import things that are needed generically
46
+ from langchain.pydantic_v1 import BaseModel, Field
47
+ from langchain.tools import tool
48
+ from query import answer_query
49
+
50
+ @tool
51
+ def get_repo_issues(repo_url : str)->str:
52
+ """ Use this tool to get the issues about the repo , it is very important that you pass the repo url directly in the input and nothing else"""
53
+ # Extract owner and repo names from the URL
54
+ parts = repo_url.rstrip('.git').split('/')
55
+ owner, repo = parts[-2], parts[-1]
56
+
57
+ # GitHub API endpoint for issues
58
+ api_url = f'https://api.github.com/repos/{owner}/{repo}/issues'
59
+
60
+ try:
61
+ # Make GET request to GitHub API
62
+ response = requests.get(api_url)
63
+
64
+ # Check if the request was successful (status code 200)
65
+ if response.status_code == 200:
66
+ # Parse JSON response and return issues
67
+ result = response.json()
68
+ return json.dumps(result)
69
+ else:
70
+ return f"Error: Unable to fetch issues. Status code: {response.status_code}"
71
+ except Exception as e:
72
+ return f"Error: {str(e)}"
73
+
74
+ @tool
75
+ def retrieve_repo(question: str)->str:
76
+ """" use this to get code from the repository or the project.You should look for file or Folder name or code snippets regarding the query.The input you give to this tool should be detailed
77
+ if the question is a general question regarding the project for ex - "what is the repo about " then try to find the readme file"""
78
+
79
+ result = answer_query(question,os.getenv("collection_name"))
80
+ # docs = vectorstore.similarity_search(question)
81
+ # for doc in docs:
82
+ # result += " " + doc.page_content
83
+ return result
84
+
85
+
86
+ from langchain import hub
87
+ from langchain.agents import initialize_agent,create_react_agent,AgentExecutor
88
+ tools = [retrieve_repo,stackexchange_tool,search_tool]
89
+
90
+
91
+ # Get the prompt to use - you can modify this!
92
+ prompt = hub.pull("hwchase17/react")
93
+ prompt.template = """You are Coding assistant , who answers questions based on the github repo or project,
94
+ Answer the following questions as best you can , However dont make up anything on your own, always try to look for relevant documents in the repo
95
+ . You have access to the following tools:
96
+ {tools}
97
+ Use the following format:
98
+ Question: the input question you must answer
99
+ Thought: you should always think about what to do
100
+ Action: the action to take, should be one of [{tool_names}]( if you are going to use retrieve repo tool -
101
+ IT IS VERY IMPORTANT TO ASK A DETAILED AND LENGTHY QUESTION TO GET QUALITY RESPONSE)
102
+ Action Input: the input to the action
103
+ Observation: the result of the action,(here it is mandatory to check wether the observation is related to the question or not, if not repeat the process untill you are satisfied)
104
+ ... (this Thought/Action/Action Input/Observation can repeat N times)
105
+ Thought: I now know the final answer,
106
+ Final Answer:(in case of retrieve repo tool , the observation from the tool should be your final answer directly) the final answer to the original input question,You should first explain the concept in a clear and concise manner and You should try to provide code snippets for better understanding
107
+ Begin!
108
+ Question: {input}
109
+ Thought:{agent_scratchpad}"""
110
+ memory = ConversationBufferWindowMemory(
111
+ memory_key='chat_history',
112
+ k=3,
113
+ return_messages=True
114
+ )
115
+ #agent = create_react_agent(llm, tools, prompt)
116
+ # Create an agent executor by passing in the agent and tools
117
+
118
+ conversational_agent = create_react_agent(
119
+ tools=tools,
120
+ llm=llm,
121
+ prompt=prompt
122
+ )
123
+ agent_executor = AgentExecutor(agent=conversational_agent, tools=tools,handle_parsing_errors=True,verbose=True)
124
+ def agent_query(query):
125
+ result = agent_executor.invoke({"input":query});
126
+ print(result)
127
+ return result["output"]
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import random
4
+ import time
5
+ import os
6
+ from query import answer_query
7
+ from main import repository_loader,get_repo_name
8
+ from agent import agent_query
9
+
10
+ st.title("Git Bot - v 0.02")
11
+
12
+ if 'flag' not in st.session_state:
13
+ st.session_state['flag'] = True
14
+
15
+ url = st.sidebar.text_input("Github url")
16
+ if url and st.session_state.flag:
17
+ with st.spinner('Embedding your Repository...'):
18
+ os.environ["collection_name"] =url
19
+ repository_loader(url)
20
+ st.session_state.flag = False
21
+ st.success('Done!')
22
+
23
+ # Initialize chat history
24
+ if "messages" not in st.session_state:
25
+ st.session_state.messages = []
26
+
27
+ # Display chat messages from history on app rerun
28
+ for message in st.session_state.messages:
29
+ with st.chat_message(message["role"]):
30
+ st.markdown(message["content"])
31
+
32
+ # React to user input
33
+ if prompt := st.chat_input("What is up?"):
34
+ # Display user message in chat message container
35
+ with st.chat_message("user"):
36
+ st.markdown(prompt)
37
+ # Add user message to chat history
38
+ st.session_state.messages.append({"role": "user", "content": prompt})
39
+
40
+ if not url:
41
+ st.warning('Please enter your Github Link!', icon='⚠')
42
+ if url and os.getenv("collection_name"):
43
+ with st.chat_message("assistant"):
44
+ message_placeholder = st.empty()
45
+ full_response = ""
46
+ # assistant_response =answer_query(prompt,url)
47
+ assistant_response = agent_query(prompt)
48
+ # Simulate stream of response with milliseconds delay
49
+ # for chunk in assistant_response.split():
50
+ # full_response += chunk + " "
51
+ # time.sleep(0.05)
52
+ # # Add a blinking cursor to simulate typing
53
+ # message_placeholder.markdown(full_response + "▌")
54
+ message_placeholder.markdown(assistant_response)
55
+ # Add assistant response to chat history
56
+ st.session_state.messages.append({"role": "assistant", "content": assistant_response})
57
+
58
+
compose.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.8"
2
+ services:
3
+ server:
4
+ build:
5
+ context: .
6
+ dockerfile: Dockerfile
7
+
8
+ ports:
9
+ - 6333:6333
10
+ - 8501:8501
11
+
12
+ volumes:
13
+ - .:/app
14
+
15
+ environment:
16
+ - QDRANT_API_KEY=SstwpxN4A-cH-pwdocTCighLpo4dX0ldLat39yRe48lVn1wppcH8Ig
17
+ - QDRANT_HOST=https://6d58fa02-778a-48b9-9c2c-c25875284ec6.us-east4-0.gcp.cloud.qdrant.io
18
+ - TOGETHER_API_KEY=d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0
19
+ - SERPAPI_API_KEY=dfa5f0e6dfffb9e6749ab4c5dd7e3490d922be171b1adebd6c2a493661999269
20
+
21
+ develop:
22
+ watch:
23
+ - path: .
24
+ action: rebuild
25
+
26
+ deploy:
27
+ resources:
28
+ limits:
29
+ memory: 4096M # Adjust the memory limit as needed
main.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import git
4
+ from urllib.parse import urlparse
5
+
6
+ local_dir = os.getcwd()
7
+ branch = None
8
+
9
+ # Function to extract repository name from URL
10
+ def get_repo_name(url):
11
+ parsed_url = urlparse(url)
12
+ # Extract the base name from the path (which is usually the repository name)
13
+ repo_name = os.path.basename(parsed_url.path)
14
+ # Remove the ".git" extension if it exists
15
+ repo_name = repo_name[:-4]
16
+ return repo_name
17
+
18
+ # Function to clone a Git repository
19
+ def clone_repo(url):
20
+ try:
21
+ path = os.path.join(local_dir,"staging",get_repo_name(url))
22
+ # Check if the repository already exists in the specified path
23
+ if os.path.exists(path):
24
+ print(f"{get_repo_name(url)} already added in db")
25
+ return False
26
+
27
+ repo = git.Repo.clone_from(url,path)
28
+ global branch
29
+ branch = repo.head.reference
30
+ print(f"{get_repo_name(url)} cloned succesfully")
31
+ return True
32
+ except Exception as e :
33
+ print(f"Error cloning the git repository: {e}")
34
+ return False
35
+
36
+ def delete_cloned_repo(url):
37
+ local_path = os.path.join(local_dir,"staging",get_repo_name(url))
38
+ try:
39
+ # Check if the local path exists
40
+ if os.path.exists(local_path):
41
+ # Use shutil.rmtree to remove the entire directory
42
+ shutil.rmtree(local_path,ignore_errors=True)
43
+ print(f"Repository at {local_path} successfully deleted.")
44
+ else:
45
+ print(f"Repository at {local_path} does not exist.")
46
+ except Exception as e:
47
+ print(f"Error deleting repository: {e}")
48
+
49
+ from langchain_community.document_loaders import GitLoader
50
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
51
+ from langchain_community.vectorstores import Qdrant
52
+ import qdrant_client
53
+
54
+ text_splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size = 1000,
56
+ chunk_overlap = 20,
57
+ )
58
+
59
+ # from langchain_together.embeddings import TogetherEmbeddings
60
+ # embeddings2 = TogetherEmbeddings(model="togethercomputer/m2-bert-80M-8k-retrieval",together_api_key="d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0")
61
+
62
+ client = qdrant_client.QdrantClient(
63
+ os.getenv("QDRANT_HOST"),
64
+ api_key=os.getenv("QDRANT_API_KEY")
65
+ )
66
+
67
+ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
68
+ embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
69
+ vectorstore = None
70
+
71
+ def load_repo(url):
72
+ collection_config = qdrant_client.http.models.VectorParams(
73
+ size=384, # 768 for instructor-xl, 1536 for OpenAI
74
+ distance=qdrant_client.http.models.Distance.COSINE
75
+ )
76
+
77
+ client.recreate_collection(
78
+ collection_name=get_repo_name(url),
79
+ vectors_config=collection_config
80
+ )
81
+ vectorstore = Qdrant(
82
+ client=client,
83
+ collection_name=get_repo_name(url),
84
+ embeddings=embeddings
85
+ )
86
+ print("collection created")
87
+ try:
88
+ loader = GitLoader(repo_path=os.path.join(local_dir,"staging",get_repo_name(url)), branch=branch, file_filter=lambda file_path: not file_path.endswith("package-lock.json"),)
89
+ data = loader.load()
90
+ chunks = text_splitter.split_documents(data)
91
+ print("chunks created")
92
+ vectorstore.add_documents(chunks)
93
+ return True
94
+ except Exception as e:
95
+ print(f"Error loading and indexing repository: {e}")
96
+ return False
97
+
98
+ def repository_loader(url):
99
+ result = False
100
+ if(clone_repo(url)):
101
+ result = load_repo(url)
102
+ if result :
103
+ delete_cloned_repo(url)
104
+
105
+
106
+
107
+ print('HELLO FROM CONTAINER')
108
+ #answer_query("How is the routing done in this project and what are the routes used",'https://github.com/s0ham075/Google-Docs-Frontend.git')
109
+
110
+ # delete_cloned_repo()
query.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import RetrievalQA
2
+ from langchain.prompts import PromptTemplate
3
+ from langchain_together import Together
4
+ from langchain_community.vectorstores import Qdrant
5
+ from main import get_repo_name
6
+ import qdrant_client
7
+ import os
8
+
9
+ from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
10
+ embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
11
+
12
+ client = qdrant_client.QdrantClient(
13
+ os.getenv("QDRANT_HOST"),
14
+ api_key=os.getenv("QDRANT_API_KEY")
15
+ )
16
+
17
+
18
+ B_INST, E_INST = "[INST]", "[/INST]"
19
+ B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
20
+
21
+ def get_prompt(instruction, new_system_prompt ):
22
+ SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
23
+ prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
24
+ return prompt_template
25
+
26
+ sys_prompt = """You are a helpful, smart and intelligent coding assistant. Always answer as helpfully as possible using the context code provided. Your answers should only answer the question once, you can provide code snippets but make sure you explain them thoroughly
27
+
28
+ If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. """
29
+
30
+ instruction = """CONTEXT CODE:/n/n {context}/n
31
+
32
+ Question: {question}"""
33
+
34
+
35
+ prompt_template = get_prompt(instruction, sys_prompt)
36
+
37
+ llama_prompt = PromptTemplate(
38
+ template=prompt_template, input_variables=["context", "question"]
39
+ )
40
+
41
+ llama2_llm = Together(
42
+ model="togethercomputer/llama-2-70b-chat",
43
+ temperature=0.7,
44
+ max_tokens=1024,
45
+ together_api_key="d8ec7106bd0c268bf4672dba83272b86054fbe849eba82f3f75ceb17e6d57eb0"
46
+ )
47
+
48
+
49
+ def process_llm_response(llm_response):
50
+ response = " "
51
+ response += llm_response['result'] + "\n\nSources\n"
52
+ for source in llm_response['source_documents']:
53
+ response +="Source - "+source.metadata['source'] +"\n"
54
+
55
+ return response
56
+
57
+ def answer_query(query,url):
58
+ vectorstore = Qdrant(
59
+ client=client,
60
+ collection_name=get_repo_name(url),
61
+ embeddings=embeddings
62
+ )
63
+ qa_chain = RetrievalQA.from_chain_type(llm= llama2_llm, chain_type_kwargs = {"prompt": llama_prompt},chain_type="stuff",retriever= vectorstore.as_retriever(),return_source_documents = True)
64
+ return process_llm_response(qa_chain(query))
65
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ qdrant_client
3
+ langchain-community
4
+ langchain-together
5
+ langchainhub
6
+ fastembed
7
+ streamlit
8
+ GitPython
9
+ stackapi
10
+ google-search-results
11
+ requests