File size: 4,515 Bytes
f2dfb83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import streamlit as st
from dotenv import load_dotenv
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import Chroma
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import pandas as pd
from huggingface_hub import login

# Load environment variables
openai_api_key = os.getenv("OPENAI_API_KEY")

hf_key = os.getenv("huggingface")
login(hf_key)

# Load product data from CSV
product_data_path = "./db/catalog_chatbot_2024-07-08.csv"
df = pd.read_csv(product_data_path, encoding='ISO-8859-1', sep=';')

# Define the embedding model
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

# Create a persistent directory for ChromaDB
persistent_directory = os.path.join("./","db", "chroma_open_ai")

# Check if the vector store already exists
if not os.path.exists(os.path.join(persistent_directory, 'chroma.sqlite3')):
    db = Chroma(persist_directory=persistent_directory, embedding_function=embeddings)
    for index, row in df.iterrows():
        product_info = (
            f"Nom du produit: {row['Nom du produit']} - "
            f"Catégorie: {row['Catégorie par défaut']} - "
            f"Caractéristiques: {row['Caractéristiques']} - "
            f"Prix: {row['Prix de vente TTC']} - "
            f"Description: {row['Description sans HTML']}"
        )
        metadata = {
            "reference": row['Référence interne'],
            "name": row['Nom du produit'],
            "price": row['Prix de vente TTC'],
            "product_url": row['URL Produit']
        }
        db.add_texts(texts=[product_info], metadatas=[metadata])
    db.persist()
else:
    db = Chroma(persist_directory=persistent_directory, embedding_function=embeddings)

# Create a retriever
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 10})

# Create a ChatOpenAI model
llm = ChatOpenAI(model="gpt-4o")

# Function to format the products
def format_retrieved_products(retrieved_docs):
    recommendations = []
    seen_products = set()
    for doc in retrieved_docs:
        metadata = doc.metadata
        product_name = metadata.get("name", "Produit inconnu")
        price = metadata.get("price", "Prix non disponible")
        product_url = metadata.get("product_url", "#")

        if product_name not in seen_products:
            recommendation = f"**{product_name}** - {price} €\n[Voir produit]({product_url})"
            recommendations.append(recommendation)
            seen_products.add(product_name)
    
    return "\n".join(recommendations)

# Update the system prompt
qa_system_prompt = (
    "You are a sales assistant helping customers purchase wine. "
    "Use the retrieved context from the Chroma DB to answer the question. "
    "Recommend 3 different items and provide the URLs of the 3 products from Calais Vins."
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
        ("system", "{context}")
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

# Define a retrieval chain
def create_custom_retrieval_chain(retriever, llm_chain):
    def invoke(inputs):
        query = inputs["input"]
        retrieved_docs = retriever.get_relevant_documents(query)
        formatted_response = format_retrieved_products(retrieved_docs)
        return {"answer": formatted_response}
    
    return invoke

rag_chain = create_custom_retrieval_chain(retriever, question_answer_chain)

# Streamlit App Interface
def run_streamlit_chatbot():
    st.title("Wine Sales Assistant")
    
    chat_history = []
    
    # User input area
    user_query = st.text_input("Posez une question au chatbot (e.g., je recherche un vin blanc fruité):")

    if user_query:
        result = rag_chain({"input": user_query, "chat_history": chat_history})
        
        # Display chatbot response
        st.write("### Chatbot's Recommendations:")
        st.write(result["answer"])

        # Display recommendations in a pop-up like fashion
        with st.expander("Voir les recommandations"):
            st.write(result["answer"])

# Main function to run the Streamlit app
if __name__ == "__main__":
    run_streamlit_chatbot()