Spaces:

Josh-Ola
/

xtrade_bot

Runtime error

File size: 5,037 Bytes

65976bc

#!/usr/bin/env python3
from dotenv import load_dotenv
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.llms.ollama import Ollama
from langchain.chat_models import ChatOllama
from langchain.memory import ConversationBufferMemory
import chromadb
import os
# import argparse
import time
from flask import Flask, jsonify, Blueprint, request
from constants import CHROMA_SETTINGS

from prompt_verified import create_prompt_template

#if not load_dotenv():
if not load_dotenv(".env"):
    print("Could not load .env file or it is empty. Please check if it exists and is readable.")
    exit(1)

embeddings_model_name = os.environ.get("EMBEDDINGS_MODEL_NAME")
persist_directory = os.environ.get('PERSIST_DIRECTORY')

model_type = os.environ.get('MODEL_TYPE')
model_path = os.environ.get('MODEL_PATH')
model_n_ctx = os.environ.get('MODEL_N_CTX')
model_n_batch = int(os.environ.get('MODEL_N_BATCH',8))
target_source_chunks = int(os.environ.get('TARGET_SOURCE_CHUNKS',4))

chat = Blueprint('chat', __name__)



@chat.route("/home", methods=["GET"])
@chat.route("/")
def base():

    return jsonify(
        {
            "status": "success",
            "message": "Welcome to the chatbot system",
            "responseCode": 200
        }
    ), 200

memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="question",
    output_key='answer',
    return_messages=True,
    # human_prefix = "John Doe",
    # ai_prefix = "AFEX-trade-bot",
)

def load_qa_chain(memory, prompt):

    embeddings = OllamaEmbeddings(model=embeddings_model_name)
    chroma_client = chromadb.PersistentClient(
        settings=CHROMA_SETTINGS,
        path=persist_directory
    )
    db = Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings,
        client_settings=CHROMA_SETTINGS,
        client=chroma_client
    )
    retriever = db.as_retriever(
        search_kwargs={
            "k": target_source_chunks
        }
    )

    # Prepare the LLM
    match model_type:
        case "ollama":
            llm = Ollama(
                model=model_path,
                temperature=0.2
            )
        case _default:
            # raise exception if model_type is not supported
            raise Exception(f"Model type {model_type} is not supported. Please choose one of the following: LlamaCpp, GPT4All")

    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents= True
    )

    qa = ConversationalRetrievalChain.from_llm(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        memory=memory,
        return_source_documents=True,
        combine_docs_chain_kwargs={
            'prompt': prompt,
        },
        verbose=True,
    )
    return qa

@chat.route("/chat-bot", methods=["POST"])
def main():
    global memory
    # try:
    # request.
    # -------------- TO-DO ------------------ #
    # Add a constraint to raise an error if   #
    # the userID is not passed in the request #
    # -------------- TO-DO ------------------ #

    userID = str(request.args.get('userID'))
    customer_name = str(request.args.get('customerName'))
    request_data = request.get_json()
    # print(request_data['query'])
    query = request_data['query']

    
    
    # Interactive questions and answers

    while True:
        if query.strip() == "":
            continue

        start_time = time.time()
        prompt = create_prompt_template(customerName=customer_name)
        qa = load_qa_chain(prompt=prompt, memory=memory)
        response = qa(
            {
                "question": query,
            }
        )
        end_time = time.time()
        time_taken = round(end_time - start_time, 2)
        # print(time_taken)

        answer = str(response['answer'])
        
        docs = response['source_documents']
        print(response)

        # Print the relevant sources used for the answer
        for document in docs:
            print("\n> " + document.metadata["source"] + ":")
    #         print(document.page_content)
        
    #     return jsonify(res['result'])

        return jsonify(
            {
                "Query": query,
                "UserID":userID,
                "Time_taken": time_taken,
                "reply": answer,
                # "chain_response": response,
                "customer_name": customer_name,
                "responseCode": 200
            }
        ), 200
    # except Exception as e:
    #     print(e)
    #     return jsonify(
    #         {
    #             "Status": "An error occured",
    #             # "error": e,
    #             "responseCode": 201
    #         }
    #     ), 201

# Flask App setup
app = Flask(__name__)
app.register_blueprint(chat)

if __name__ == "__main__":
    app.run(debug=True, host='0.0.0.0', port=8088)
    # main()