{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30762,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":" %pip install newspaper3k","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"pip install chromadb langchain-chroma","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":" %pip install lxml_html_clean","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":" %pip install langgraph","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"#pip install selenium","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"pip install -U wikipedia","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":" %pip install -U langchain tavily-python matplotlib langchain_community scikit-learn langchainhub sentence-transformers langchain-exa","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":" %pip install unstructured langchain_groq","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"!rm -rf /kaggle/working/*","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"!rm -rf /kaggle/working/docs/chroma","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"import os\nfrom typing_extensions import TypedDict, List\nfrom IPython.display import Image, display\nfrom langgraph.graph import START, END, StateGraph\nfrom langchain_community.llms import llamacpp, LlamaCpp\nfrom langchain_core.output_parsers import StrOutputParser\nfrom langchain_core.output_parsers import JsonOutputParser\nimport uuid \nfrom langchain_community.chat_models.llamacpp import ChatLlamaCpp\nfrom langchain.text_splitter import RecursiveCharacterTextSplitter\nfrom langchain_community.document_loaders import WebBaseLoader\nfrom langchain_community.vectorstores import SKLearnVectorStore\nfrom langchain_community.embeddings import HuggingFaceEmbeddings\nfrom langchain_community.chat_models.llamacpp import ChatLlamaCpp\nfrom sentence_transformers import SentenceTransformer\nfrom langchain.vectorstores import Chroma\nfrom langchain_community.document_loaders import UnstructuredURLLoader, NewsURLLoader\n#from langchain_community.document_loaders.url_selenium import SeleniumURLLoader\nfrom langchain_community.document_loaders import NewsURLLoader\nfrom langchain_community.retrievers.wikipedia import WikipediaRetriever\nfrom langchain_community.tools.tavily_search import TavilySearchResults\nfrom langchain_exa import ExaSearchRetriever, TextContentsOptions\nfrom langchain_groq import ChatGroq\nfrom langchain_community.utilities import GoogleSerperAPIWrapper\nfrom langchain_chroma import Chroma\n","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:04:13.656974Z","iopub.execute_input":"2024-09-10T13:04:13.657321Z","iopub.status.idle":"2024-09-10T13:04:47.528110Z","shell.execute_reply.started":"2024-09-10T13:04:13.657287Z","shell.execute_reply":"2024-09-10T13:04:47.527289Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n from tqdm.autonotebook import tqdm, trange\n","output_type":"stream"}]},{"cell_type":"code","source":"os.environ[\"EXA_API_KEY\"] = exa_apiKey\nos.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\nos.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.langchain.plus\"\nos.environ[\"LANGCHAIN_API_KEY\"] = lang_api_key\nos.environ[\"LANGCHAIN_PROJECT\"] = \"gemma2_9b_CRAG_SKLearn_Wiki+Serper_Async_SIM\"\nos.environ[\"GROQ_API_KEY\"] = groq_api_key\nos.environ[\"SERPER_API_KEY\"] = SERPER_API_KEY\n\n\n\n","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:04:47.529603Z","iopub.execute_input":"2024-09-10T13:04:47.530258Z","iopub.status.idle":"2024-09-10T13:04:47.536052Z","shell.execute_reply.started":"2024-09-10T13:04:47.530223Z","shell.execute_reply":"2024-09-10T13:04:47.534722Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"# List of URLs to load documents from\nurls = [\n \n\"https://github.com/zedr/clean-code-python\",\n\"https://tenthousandmeters.com/blog/python-behind-the-scenes-10-how-python-dictionaries-work/\",\n\"https://realpython.com/python-testing/\",\n\"https://docs.python-guide.org/writing/license/\",\n \"https://blogs.nvidia.com/blog/what-is-a-transformer-model/\",\n \"https://research.google/blog/transformer-a-novel-neural-network-architecture-for-language-understanding/\",\n\"https://realpython.com/python-pep8/\",\n\"https://towardsdatascience.com/ideal-python-environment-setup-for-data-science-cdb03a447de8\",\n\"https://realpython.com/python3-object-oriented-programming/\",\n\"https://realpython.com/python-functional-programming/\",\n\"https://fivethirtyeight.com/features/science-isnt-broken/\",\n\"https://github.com/renatofillinich/ab_test_guide_in_python/blob/master/AB%20testing%20with%20Python.ipynb\",\n\"https://towardsdatascience.com/why-is-data-science-failing-to-solve-the-right-problems-7b5b6121e3b4\",\n\"https://medium.com/@srowen/common-probability-distributions-347e6b945ce4\",\n\"https://github.com/renatofillinich/ab_test_guide_in_python/blob/master/AB%20testing%20with%20Python.ipynb\",\n\"https://scikit-learn.org/stable/modules/compose.html\",\n\"https://machinelearningmastery.com/light-gradient-boosted-machine-lightgbm-ensemble/\",\n\"https://neptune.ai/blog/xgboost-vs-lightgbm\",\n\"https://towardsdatascience.com/interpretable-machine-learning-with-xgboost-9ec80d148d27\",\n\"https://www.cio.com/article/247005/what-are-containers-and-why-do-you-need-them.html\",\n\"https://mitsloan.mit.edu/ideas-made-to-matter/machine-learning-explained\",\n\"https://towardsdatascience.com/making-friends-with-machine-learning-5e28d5205a29\",\n\"https://towardsdatascience.com/handling-imbalanced-datasets-in-machine-learning-7a0e84220f28\",\n\"https://machinelearningmastery.com/multi-class-imbalanced-classification/\",\n\"https://imbalanced-learn.org/stable/auto_examples/applications/plot_impact_imbalanced_classes.html\",\n\"https://docs.ray.io/en/master/tune/examples/tune-sklearn.html\",\n\"https://www.kaggle.com/code/ldfreeman3/a-data-science-framework-to-achieve-99-accuracy\",\n\"https://cs231n.github.io/optimization-2/\",\n\"https://alexander-schiendorfer.github.io/2020/02/24/a-worked-example-of-backprop.html\",\n\"https://www.analyticsvidhya.com/blog/2020/01/fundamentals-deep-learning-activation-functions-when-to-use-them/\",\n\"https://ml-cheatsheet.readthedocs.io/en/latest/activation_functions.html\",\n\"https://d2l.ai/chapter_multilayer-perceptrons/mlp.html\",\n\"https://d2l.ai/chapter_linear-classification/softmax-regression.html#loss-function\",\n\"https://d2l.ai/chapter_optimization/\",\n \"https://www.investopedia.com/terms/s/statistical-significance.asp\",\n\"https://d2l.ai/chapter_linear-classification/softmax-regression.html#loss-function\",\n\"https://d2l.ai/chapter_convolutional-neural-networks/why-conv.html\",\n\"https://d2l.ai/chapter_convolutional-modern/alexnet.html\",\n\"https://d2l.ai/chapter_convolutional-modern/vgg.html\",\n\"https://d2l.ai/chapter_convolutional-modern/nin.html\",\n\"https://d2l.ai/chapter_convolutional-modern/googlenet.html\",\n 'https://python.langchain.com/v0.1/docs/guides/productionization/evaluation/',\n 'https://python.langchain.com/v0.1/docs/guides/productionization/evaluation/string/',\n 'https://python.langchain.com/v0.1/docs/guides/productionization/evaluation/comparison/',\n 'https://python.langchain.com/v0.1/docs/guides/productionization/evaluation/trajectory/',\n \"https://langchain-ai.github.io/langgraph/concepts/high_level/#why-langgraph\",\n 'https://langchain-ai.github.io/langgraph/concepts/low_level/#only-stream-tokens-from-specific-nodesllms',\n \"https://langchain-ai.github.io/langgraph/concepts/agentic_concepts/#reflection\",\n \"https://langchain-ai.github.io/langgraph/concepts/faq/\",\n \"https://www.geeksforgeeks.org/python-oops-concepts/\",\n \"https://www.mckinsey.com/featured-insights/mckinsey-explainers/what-is-fintech\",\n \"https://datascientest.com/en/adversarial-attack-definition-and-protection-against-this-threat\",\n \"https://datascientest.com/en/all-about-dspy\",\n \"https://datascientest.com/en/arithmetic-and-data-science\",\n \"https://datascientest.com/en/all-about-machine-learning-metrics\",\n \"https://datascientest.com/en/all-about-procedural-programming\",\n \"https://datascientest.com/en/all-about-cryptography\",\n \"https://datascientest.com/en/all-about-predictive-coding\",\n \"https://datascientest.com/en/all-about-network-convergence\",\n \"https://datascientest.com/en/all-about-forensic-analysis\",\n \"https://datascientest.com/en/all-about-chatgpt-jailbreak\",\n \"https://datascientest.com/en/all-about-pentest\",\n \"https://datascientest.com/en/all-about-embedded-systems\",\n \"https://datascientest.com/en/all-about-network-operating-system\",\n \"https://datascientest.com/en/all-about-ai-and-cybersecurity\",\n \"https://datascientest.com/en/all-about-cybernetics\",\n \"https://datascientest.com/en/all-about-seo\",\n \"https://datascientest.com/en/all-about-expert-system\",\n \"https://datascientest.com/en/all-about-telecommunications\",\n \"https://datascientest.com/en/all-about-smart-cities\",\n \"https://datascientest.com/en/all-about-artificial-intelligence-and-finance-sector\",\n \"https://datascientest.com/en/all-about-generated-pre-trained-transformers\",\n \"https://datascientest.com/en/all-about-iso-27001\",\n \"https://datascientest.com/en/all-about-smart-sensors\",\n \"https://datascientest.com/en/all-about-virtual-networks\",\n \"https://datascientest.com/en/all-about-ethical-ai\",\n \"https://datascientest.com/en/all-about-saio\",\n \"https://datascientest.com/en/all-about-recommendation-algorithm\",\n \"https://www.geeksforgeeks.org/activation-functions-neural-networks/\",\n \"https://www.geeksforgeeks.org/activation-functions-in-neural-networks-set2/?ref=oin_asr1\",\n \"https://www.geeksforgeeks.org/choosing-the-right-activation-function-for-your-neural-network/?ref=oin_asr3\",\n \"https://www.geeksforgeeks.org/difference-between-feed-forward-neural-networks-and-recurrent-neural-networks/?ref=oin_asr2\",\n \"https://www.geeksforgeeks.org/recurrent-neural-networks-explanation/?ref=oin_asr11\",\n \"https://www.geeksforgeeks.org/deeppose-human-pose-estimation-via-deep-neural-networks/?ref=oin_asr13\",\n \"https://www.geeksforgeeks.org/auto-associative-neural-networks/?ref=oin_asr18\",\n \"https://www.geeksforgeeks.org/what-are-graph-neural-networks/?ref=oin_asr30\",\n \"https://hdsr.mitpress.mit.edu/pub/la3vitqm/release/2\",\n \"https://datasciencedojo.com/blog/a-guide-to-large-language-models/\",\n \"https://datasciencedojo.com/blog/bootstrap-sampling/\",\n \"https://datasciencedojo.com/blog/top-statistical-concepts/\",\n \"https://datasciencedojo.com/blog/probability-for-data-science/\",\n \"https://datasciencedojo.com/blog/top-statistical-techniques/\",\n \"https://datasciencedojo.com/blog/statistical-distributions/\",\n \"https://datasciencedojo.com/blog/data-science-in-finance/\",\n \"https://datasciencedojo.com/blog/random-forest-algorithm/\",\n \"https://datasciencedojo.com/blog/gini-index-and-entropy/\",\n \"https://datasciencedojo.com/blog/boosting-algorithms-in-machine-learning/\",\n \"https://datasciencedojo.com/blog/ensemble-methods-in-machine-learning/\",\n \"https://datasciencedojo.com/blog/langgraph-tutorial/\",\n \"https://datasciencedojo.com/blog/data-driven-marketing-in-2024/\",\n \"https://datasciencedojo.com/blog/on-device-ai/\",\n \"https://www.analyticsvidhya.com/blog/2023/10/a-comprehensive-guide-to-using-chains-in-langchain/\",\n \"https://blog.langchain.dev/langgraph/\",\n \"https://www.analyticsvidhya.com/blog/2024/09/passive-aggressive-algorithms/\",\n \"https://www.analyticsvidhya.com/blog/2017/09/common-machine-learning-algorithms/\",\n 'https://www.analyticsvidhya.com/blog/2023/09/retrieval-augmented-generation-rag-in-ai/',\n \"https://dl.acm.org/doi/full/10.1145/3547330\",\n \"https://www.datacamp.com/blog/what-is-natural-language-processing\",\n \"https://www.datacamp.com/blog/attention-mechanism-in-llms-intuition\",\n \"https://www.datacamp.com/tutorial/tutorial-natural-language-processing\",\n \"https://www.datacamp.com/blog/how-nlp-is-changing-the-future-of-data-science\",\n \"https://www.datacamp.com/blog/what-is-named-entity-recognition-ner\",\n \"https://www.datacamp.com/blog/ai-cybersecurity\",\n \"https://www.datacamp.com/blog/natural-language-understanding-nlu\",\n \"https://www.datacamp.com/blog/rlaif-reinforcement-learning-from-ai-feedback\",\n \"https://www.datacamp.com/blog/what-is-transfer-learning-in-ai-an-introductory-guide\",\n \"https://www.datacamp.com/blog/what-is-few-shot-learning\",\n \"https://www.datacamp.com/blog/what-is-continuous-learning\",\n \"https://www.datacamp.com/blog/what-is-tokenization\",\n \"https://www.datacamp.com/blog/curse-of-dimensionality-machine-learning\",\n \"https://www.datacamp.com/blog/what-is-overfitting\",\n \"https://www.datacamp.com/blog/what-is-an-algorithm\",\n \"https://www.datacamp.com/blog/what-is-a-generative-model\",\n \"https://www.datacamp.com/blog/what-is-online-machine-learning\",\n \"https://www.datacamp.com/blog/what-is-feature-learning\",\n \"https://www.datacamp.com/blog/what-is-cognitive-computing\",\n \"https://www.datacamp.com/blog/what-is-synthetic-data\",\n \"https://www.datacamp.com/blog/what-is-machine-perception\",\n \"https://www.datacamp.com/blog/introduction-to-unsupervised-learning\",\n \"https://www.datacamp.com/blog/what-is-machine-learning-inference\",\n \"https://www.datacamp.com/blog/machine-learning-models-explained\",\n \"https://www.datacamp.com/blog/top-machine-learning-interview-questions\",\n \"https://www.datacamp.com/blog/a-beginner-s-guide-to-the-machine-learning-workflow\",\n \"https://www.ibm.com/topics/artificial-intelligence-finance\",\n \"https://www.datacamp.com/blog/ai-in-finance\",\n \"https://www.oracle.com/erp/financials/ai-finance/\",\n \"https://www.geeksforgeeks.org/applications-of-ai/\",\n \"https://www.geeksforgeeks.org/super-intelligence-vs-artificial-intelligence/?ref=oin_asr2\",\n \"https://www.simplilearn.com/tutorials/artificial-intelligence-tutorial/artificial-intelligence-applications\",\n \"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9505413/\",\n \"https://www.nature.com/articles/s41592-024-02391-7\",\n \"https://www.nature.com/collections/ahhdhbhgha\",\n \"https://www.nationalacademies.org/news/2023/11/how-ai-is-shaping-scientific-discovery\",\n \"https://www.nationalacademies.org/news/2024/08/exploring-the-implications-of-ai-for-climate\",\n \"https://www.analyticsvidhya.com/blog/2017/09/common-machine-learning-algorithms/\",\n \"https://books.google.lt/books?id=JOOkDwAAQBAJ&printsec=frontcover&hl=lt#v=onepage&q&f=false\",\n \"https://www.geeksforgeeks.org/software-development-life-cycle-sdlc/\",\n \"https://www.geeksforgeeks.org/real-world-applications-of-sdlc-software-development-life-cycle/?ref=oin_asr3\",\n \"https://www.geeksforgeeks.org/software-engineering-comparison-of-different-life-cycle-models/?ref=oin_asr16\",\n \"https://www.geeksforgeeks.org/software-engineering-information-system-life-cycle/?ref=oin_asr2\",\n \"https://www.geeksforgeeks.org/difference-between-information-retrieval-and-information-extraction/?ref=oin_asr16\",\n \"https://www.geeksforgeeks.org/information-assurance-vs-information-security/?ref=oin_asr3\",\n \"https://www.geeksforgeeks.org/difference-between-software-security-and-cyber-security/?ref=oin_asr12\",\n \"https://www.geeksforgeeks.org/information-security-and-cyber-laws/?ref=oin_asr16\",\n \"https://www.geeksforgeeks.org/how-security-system-should-evolve-to-handle-cyber-security-threats-and-vulnerabilities/?ref=oin_asr4\",\n \"https://www.geeksforgeeks.org/a-simple-understanding-of-computer/\",\n \"https://www.geeksforgeeks.org/ethics-in-data-science-and-proper-privacy-and-usage-of-data/?ref=oin_asr12\",\n \"https://www.geeksforgeeks.org/data-visualization-interview-questions/\",\n 'https://www.geeksforgeeks.org/data-analyst-interview-questions-and-answers/',\n \"https://www.geeksforgeeks.org/python-for-machine-learning/\",\n \"https://www.geeksforgeeks.org/machine-learning-interview-questions/\",\n \"https://www.geeksforgeeks.org/image-processing-in-python/\",\n \"https://www.geeksforgeeks.org/text-preprocessing-in-python-set-1/\",\n \"https://www.geeksforgeeks.org/audio-processing-with-transformer/\",\n \"https://www.geeksforgeeks.org/nlp-interview-questions/\",\n 'https://www.geeksforgeeks.org/how-should-a-machine-learning-beginner-get-started-on-kaggle/?ref=oin_asr2',\n \"https://www.geeksforgeeks.org/getting-started-with-chatgpt-a-complete-guide-with-examples/?ref=oin_asr4\",\n \"https://www.geeksforgeeks.org/getting-started-with-classification/?ref=oin_asr7\",\n \"https://www.geeksforgeeks.org/getting-started-machine-learning/?ref=oin_asr7\",\n \"https://www.geeksforgeeks.org/bias-in-machine-learning-identifying-mitigating-and-preventing-discrimination/?ref=oin_asr6\",\n \"https://www.geeksforgeeks.org/understanding-hypothesis-testing/\",\n \"https://www.geeksforgeeks.org/mathematics-covariance-and-correlation/\",\n \"https://www.geeksforgeeks.org/confidence-interval/\",\n \"https://www.geeksforgeeks.org/normal-probability-plot/\",\n \"https://www.geeksforgeeks.org/mathematics-probability/\",\n \"https://www.geeksforgeeks.org/bayess-theorem-for-conditional-probability/\",\n \"https://www.geeksforgeeks.org/mathematics-probability-distributions-set-3-normal-distribution/?ref=rp\",\n \"https://blog.langchain.dev/langgraph-multi-agent-workflows/\",\n \"https://blog.langchain.dev/few-shot-prompting-to-improve-tool-calling-performance/\",\n \"https://www.microsoft.com/en-us/research/blog/graphrag-unlocking-llm-discovery-on-narrative-private-data/\",\n \"https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/graphrag-costs-explained-what-you-need-to-know/ba-p/4207978\",\n \"https://www.mongodb.com/resources/basics/vector-stores\",\n 'https://www.pinecone.io/learn/vector-database/',\n \"https://www.pinecone.io/learn/vectors-and-graphs-better-together/\",\n \"https://blog.langchain.dev/semi-structured-multi-modal-rag/\",\n \"https://www.pinecone.io/learn/refine-with-rerank/\",\n \"https://www.datacamp.com/blog/what-is-text-embedding-ai\",\n \"https://www.datacamp.com/blog/what-is-bert-an-intro-to-bert-models\",\n \"https://www.datacamp.com/blog/what-is-an-llm-a-guide-on-large-language-models\"\n \n \n\n \n \n \n]","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:04:47.537404Z","iopub.execute_input":"2024-09-10T13:04:47.537710Z","iopub.status.idle":"2024-09-10T13:04:47.563850Z","shell.execute_reply.started":"2024-09-10T13:04:47.537677Z","shell.execute_reply":"2024-09-10T13:04:47.563011Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"def extract_sentences_from_web(links, chunk_size=500, chunk_overlap=30):\n data = []\n for link in links:\n loader = NewsURLLoader(urls=[link])\n data += loader.load()\n return data \n\n# Call the function\ndocs = extract_sentences_from_web(links=urls)","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:04:47.566268Z","iopub.execute_input":"2024-09-10T13:04:47.566600Z","iopub.status.idle":"2024-09-10T13:07:34.650749Z","shell.execute_reply.started":"2024-09-10T13:04:47.566568Z","shell.execute_reply":"2024-09-10T13:07:34.649711Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"docs_list = [item for sublist in docs for item in sublist]","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:07:34.652425Z","iopub.execute_input":"2024-09-10T13:07:34.653762Z","iopub.status.idle":"2024-09-10T13:07:34.658562Z","shell.execute_reply.started":"2024-09-10T13:07:34.653711Z","shell.execute_reply":"2024-09-10T13:07:34.657565Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"\nfrom langchain_community.vectorstores.utils import filter_complex_metadata\n\n\n\n\n\n\ntext_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n chunk_size=550,\n chunk_overlap=40,\n separators=[\n r'\\n\\n\\n', # Separate large sections\n r'\\n\\s*\\n', # Blank lines separating paragraphs\n r'\\n\\n', # Double newlines\n r'\\.\\s*\\n', # Sentence endings followed by a newline\n r'(?<=\\n)(?=\\w)', # Newline followed by a word\n r'(?<=\\w)\\n', # Word followed by a newline\n r'\\s{2,}', # Multiple spaces\n r'(?<=:)\\s*(?=\\w)', # Colon followed by a space and word\n r'(?<=\\])\\s*(?=\\w)', # Closing bracket followed by a space and word\n r'(?<=})\\s*(?=\\w)', # Closing brace followed by a space and word\n r'(?<=\\d)\\.\\s+', # Number followed by a period and space\n r'\\n{2,}', # Multiple newlines\n r'⬆\\s*back\\s*to\\s*top', # \"⬆ back to top\" as a section separator\n \n ],\n is_separator_regex=True\n)\n\n\ndoc_splits = text_splitter.split_documents(docs)\nfiltered_documents = filter_complex_metadata(doc_splits)\n\nmodel_name = \"Alibaba-NLP/gte-base-en-v1.5\"\nmodel_kwargs = {'device': 'cpu',\n \"trust_remote_code\" : 'False'}\nencode_kwargs = {'normalize_embeddings': True}\nembeddings = HuggingFaceEmbeddings(\n model_name=model_name,\n model_kwargs=model_kwargs,\n encode_kwargs=encode_kwargs\n )\n\n\n\n\n","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:07:34.659807Z","iopub.execute_input":"2024-09-10T13:07:34.660157Z","iopub.status.idle":"2024-09-10T13:07:43.383167Z","shell.execute_reply.started":"2024-09-10T13:07:34.660122Z","shell.execute_reply":"2024-09-10T13:07:43.382425Z"},"trusted":true},"execution_count":7,"outputs":[{"name":"stderr","text":"/tmp/ipykernel_203/1276892896.py:39: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 1.0. An updated version of the class exists in the langchain-huggingface package and should be used instead. To use it run `pip install -U langchain-huggingface` and import as `from langchain_huggingface import HuggingFaceEmbeddings`.\n embeddings = HuggingFaceEmbeddings(\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"modules.json: 0%| | 0.00/229 [00:00 Dict [ str , str ]: return {} def get_record ( self ) -> Union [ Record , None ]: return Record ()\\n\\n⬆ back to top\\n\\nUse searchable names\\n\\nWe will read more code than we will ever write. It\\'s important that the code we do write is readable and searchable. By not naming variables that end up being meaningful for understanding our program, we hurt our readers. Make your names searchable.\\n\\nBad:\\n\\nimport time # What is the number 86400 for again? time . sleep ( 86400 )\\n\\nGood:')"},"metadata":{}}]},{"cell_type":"code","source":"filtered_documents[0]","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.332769Z","iopub.execute_input":"2024-09-10T13:24:20.333079Z","iopub.status.idle":"2024-09-10T13:24:20.342824Z","shell.execute_reply.started":"2024-09-10T13:24:20.333048Z","shell.execute_reply":"2024-09-10T13:24:20.341984Z"},"trusted":true},"execution_count":12,"outputs":[{"execution_count":12,"output_type":"execute_result","data":{"text/plain":"Document(metadata={'title': 'zedr/clean-code-python: :bathtub: Clean Code concepts adapted for Python', 'link': 'https://github.com/zedr/clean-code-python', 'language': 'en', 'description': ':bathtub: Clean Code concepts adapted for Python. Contribute to zedr/clean-code-python development by creating an account on GitHub.'}, page_content='Table of Contents\\n\\nIntroduction\\n\\nSoftware engineering principles, from Robert C. Martin\\'s book Clean Code , adapted for Python. This is not a style guide. It\\'s a guide to producing readable, reusable, and refactorable software in Python.\\n\\nNot every principle herein has to be strictly followed, and even fewer will be universally agreed upon. These are guidelines and nothing more, but they are ones codified over many years of collective experience by the authors of Clean Code.\\n\\nAdapted from clean-code-javascript\\n\\nTargets Python3.7+\\n\\nVariables\\n\\nUse meaningful and pronounceable variable names\\n\\nBad:\\n\\nimport datetime ymdstr = datetime . date . today (). strftime ( \"%y-%m-%d\" )\\n\\nAdditionally, there\\'s no need to add the type of the variable (str) to its name.\\n\\nGood:\\n\\nimport datetime current_date : str = datetime . date . today (). strftime ( \"%y-%m-%d\" )\\n\\n⬆ back to top\\n\\nUse the same vocabulary for the same type of variable\\n\\nBad: Here we use three different names for the same underlying entity:\\n\\ndef get_user_info (): pass def get_client_data (): pass def get_customer_record (): pass\\n\\nGood: If the entity is the same, you should be consistent in referring to it in your functions:\\n\\ndef get_user_info (): pass def get_user_data (): pass def get_user_record (): pass\\n\\nEven better Python is (also) an object oriented programming language. If it makes sense, package the functions together with the concrete implementation of the entity in your code, as instance attributes, property methods, or methods:\\n\\nfrom typing import Union , Dict class Record : pass class User : info : str @ property def data ( self ) -> Dict [ str , str ]: return {} def get_record ( self ) -> Union [ Record , None ]: return Record ()\\n\\n⬆ back to top\\n\\nUse searchable names\\n\\nWe will read more code than we will ever write. It\\'s important that the code we do write is readable and searchable. By not naming variables that end up being meaningful for understanding our program, we hurt our readers. Make your names searchable.\\n\\nBad:\\n\\nimport time # What is the number 86400 for again? time . sleep ( 86400 )\\n\\nGood:')"},"metadata":{}}]},{"cell_type":"code","source":"filtered_documents[2]","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.343999Z","iopub.execute_input":"2024-09-10T13:24:20.344538Z","iopub.status.idle":"2024-09-10T13:24:20.354087Z","shell.execute_reply.started":"2024-09-10T13:24:20.344495Z","shell.execute_reply":"2024-09-10T13:24:20.353229Z"},"trusted":true},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"Document(metadata={'title': 'zedr/clean-code-python: :bathtub: Clean Code concepts adapted for Python', 'link': 'https://github.com/zedr/clean-code-python', 'language': 'en', 'description': ':bathtub: Clean Code concepts adapted for Python. Contribute to zedr/clean-code-python development by creating an account on GitHub.'}, page_content='Good:\\n\\nlocations = ( \"Austin\" , \"New York\" , \"San Francisco\" ) for location in locations : # do_stuff() # do_some_other_stuff() # ... print ( location )\\n\\n⬆ back to top\\n\\nDon\\'t add unneeded context\\n\\nIf your class/object name tells you something, don\\'t repeat that in your variable name.\\n\\nBad:\\n\\nclass Car : car_make : str car_model : str car_color : str\\n\\nGood:\\n\\nclass Car : make : str model : str color : str\\n\\n⬆ back to top\\n\\nUse default arguments instead of short circuiting or conditionals\\n\\nTricky\\n\\nWhy write:\\n\\nimport hashlib def create_micro_brewery ( name ): name = \"Hipster Brew Co.\" if name is None else name slug = hashlib . sha1 ( name . encode ()). hexdigest () # etc.\\n\\n... when you can specify a default argument instead? This also makes it clear that you are expecting a string as the argument.\\n\\nGood:\\n\\nimport hashlib def create_micro_brewery ( name : str = \"Hipster Brew Co.\" ): slug = hashlib . sha1 ( name . encode ()). hexdigest () # etc.\\n\\n⬆ back to top\\n\\nFunctions\\n\\nFunctions should do one thing\\n\\nThis is by far the most important rule in software engineering. When functions do more than one thing, they are harder to compose, test, and reason about. When you can isolate a function to just one action, they can be refactored easily and your code will read much cleaner. If you take nothing else away from this guide other than this, you\\'ll be ahead of many developers.\\n\\nBad:\\n\\nfrom typing import List class Client : active : bool def email ( client : Client ) -> None : pass def email_clients ( clients : List [ Client ]) -> None : \"\"\"Filter active clients and send them an email. \"\"\" for client in clients : if client . active : email ( client )\\n\\nGood:')"},"metadata":{}}]},{"cell_type":"code","source":"filtered_documents[22]","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.355278Z","iopub.execute_input":"2024-09-10T13:24:20.355657Z","iopub.status.idle":"2024-09-10T13:24:20.369850Z","shell.execute_reply.started":"2024-09-10T13:24:20.355616Z","shell.execute_reply":"2024-09-10T13:24:20.369001Z"},"trusted":true},"execution_count":14,"outputs":[{"execution_count":14,"output_type":"execute_result","data":{"text/plain":"Document(metadata={'title': 'Python behind the scenes #10: how Python dictionaries work', 'link': 'https://tenthousandmeters.com/blog/python-behind-the-scenes-10-how-python-dictionaries-work/', 'language': 'en', 'description': \"Python dictionaries are an extremely important part of Python. Of course they are important because programmers use them a lot, but that's not the...\"}, page_content='where \\\\(base\\\\) is the size of the alphabet.\\n\\nWith this approach, different keys may map to the same bucket. In fact, if the number of possible keys is larger than the number of buckets, then some key will always map to the same bucket no matter what hash function we choose. So we have to find a way to handle hash collisions. One popular method to do that is called chaining. The idea of chaining is to associate an additional data structure with each bucket and store all the items that hash to the same bucket in that data structure. The following picture shows a hash table that uses linked lists for chaining:')"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from langchain.schema import Document\nfrom langchain_community.tools.tavily_search import TavilySearchResults\n\nweb_search_tool = ExaSearchRetriever(\n k=5, text_contents_options=TextContentsOptions(max_length=300))","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.370904Z","iopub.execute_input":"2024-09-10T13:24:20.371211Z","iopub.status.idle":"2024-09-10T13:24:20.388274Z","shell.execute_reply.started":"2024-09-10T13:24:20.371180Z","shell.execute_reply":"2024-09-10T13:24:20.387386Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"llm = ChatGroq(\n model=\"gemma2-9b-it\", \n temperature=0.0,\n max_tokens=400,\n max_retries=3\n)","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.389380Z","iopub.execute_input":"2024-09-10T13:24:20.389663Z","iopub.status.idle":"2024-09-10T13:24:20.489668Z","shell.execute_reply.started":"2024-09-10T13:24:20.389633Z","shell.execute_reply":"2024-09-10T13:24:20.488996Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"\nfrom langchain.prompts import PromptTemplate\nfrom langchain_core.output_parsers import StrOutputParser\n\nprompt = PromptTemplate(\n template=\"\"\"You are an assistant for question-answering tasks. \n \n Use the following pieces of retrieved documents to answer the question. If you don't know the answer, just say that you don't know.\n Do not repeat yourself!\n Be informative and consise.\n\n Question: {question} \n\n documents : {documents} \n\n Answer:\n \"\"\",\n input_variables=[\"question\", \"documents\"],\n)\n\n\n\nrag_chain = prompt | llm | StrOutputParser()","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.490598Z","iopub.execute_input":"2024-09-10T13:24:20.490869Z","iopub.status.idle":"2024-09-10T13:24:20.497168Z","shell.execute_reply.started":"2024-09-10T13:24:20.490839Z","shell.execute_reply":"2024-09-10T13:24:20.496178Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"grammar = r\"\"\"\nroot ::= RagGrade\nGrade ::= \"{\" ws \"\\\"value\\\":\" ws string \"}\"\nGradelist ::= \"[]\" | \"[\" ws Grade (\",\" ws Grade)* \"]\"\nRagGrade ::= \"{\" ws \"\\\"grade\\\":\" ws Grade \"}\"\nRagGradelist ::= \"[]\" | \"[\" ws RagGrade (\",\" ws RagGrade)* \"]\"\nstring ::= \"\\\"\" ([^\"]*) \"\\\"\"\nboolean ::= \"true\" | \"false\"\nws ::= [ \\t\\n]*\nnumber ::= [0-9]+ \".\"? [0-9]*\nstringlist ::= \"[\" ws \"]\" | \"[\" ws string (\",\" ws string)* ws \"]\"\nnumberlist ::= \"[\" ws \"]\" | \"[\" ws string (\",\" ws number)* ws \"]\"\n\"\"\"","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.497982Z","iopub.execute_input":"2024-09-10T13:24:20.498278Z","iopub.status.idle":"2024-09-10T13:24:20.512462Z","shell.execute_reply.started":"2024-09-10T13:24:20.498247Z","shell.execute_reply":"2024-09-10T13:24:20.511731Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"from langchain_core.pydantic_v1 import BaseModel, Field\n\n\n# Data model for the output\nclass GradeDocuments(BaseModel):\n \"\"\"Binary score for relevance check on retrieved documents.\"\"\"\n\n binary_score: str = Field(\n description=\"Documents are relevant to the question, 'yes' or 'no'\"\n )\n\n\n# LLM with tool call\nstructured_llm_grader = llm.with_structured_output(GradeDocuments)\n\nprompt = PromptTemplate(\n template=\"\"\"You are a teacher grading a quiz. You will be given: \n 1/ a QUESTION\n 2/ A FACT provided by the student\n \n You are grading RELEVANCE RECALL:\n A score of 1 means that ANY of the statements in the FACT are relevant to the QUESTION. \n A score of 0 means that NONE of the statements in the FACT are relevant to the QUESTION. \n 1 is the highest (best) score. 0 is the lowest score you can give. \n \n Explain your reasoning in a step-by-step manner. Ensure your reasoning and conclusion are correct. \n \n Avoid simply stating the correct answer at the outset.\n \n Question: {question} \\n\n Fact: \\n\\n {documents} \\n\\n\n \n Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \\n\n Provide the binary score as a JSON with a single key 'score' and no premable or explanation.\n \"\"\",\n input_variables=['documents', 'question'],\n)\n\n\nretrieval_grader = prompt | structured_llm_grader","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.513466Z","iopub.execute_input":"2024-09-10T13:24:20.513733Z","iopub.status.idle":"2024-09-10T13:24:20.526789Z","shell.execute_reply.started":"2024-09-10T13:24:20.513703Z","shell.execute_reply":"2024-09-10T13:24:20.526106Z"},"trusted":true},"execution_count":19,"outputs":[]},{"cell_type":"code","source":"class PromtGuard(BaseModel):\n \"\"\"Binary score for toxicity check on question.\"\"\"\n\n score: str = Field(\n description=\"Is the question toxic, 'yes' or 'no'\"\n )\n\n\n# LLM with tool call\nstructured_llm_toxicity_checker = llm.with_structured_output(PromtGuard)\n\nprompt = PromptTemplate(\n template=\"\"\"You are a classifier that will score a text include based upon supplied criteria. \n\nHere is the criteria for scoring:\n\n(1) insults\n\n(2) threats\n\n(3) highly negative comments\n\n(4) any Personally Identifiable Information such as API Keys\n\nScore:\n\nA score of yes means that the text meets ANY of the criteria. \n\nA score of no means that the text does not meet ALL of the criteria. \n Here is the question: {question}\n Give a binary score 'yes' or 'no' to indicate whether the answer is useful to resolve a question.\n If the answer contains repeated phrases, repetition, then return 'no'\\n\n Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.\"\"\",\n input_variables=[ \"question\"],\n)\n\ntoxicity_grader = prompt | structured_llm_toxicity_checker\n","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.527934Z","iopub.execute_input":"2024-09-10T13:24:20.528311Z","iopub.status.idle":"2024-09-10T13:24:20.550456Z","shell.execute_reply.started":"2024-09-10T13:24:20.528269Z","shell.execute_reply":"2024-09-10T13:24:20.549735Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"code","source":"toxicity_grader.invoke({\"question\": \"What up nigger?\"})","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.553211Z","iopub.execute_input":"2024-09-10T13:24:20.553762Z","iopub.status.idle":"2024-09-10T13:24:20.983063Z","shell.execute_reply.started":"2024-09-10T13:24:20.553701Z","shell.execute_reply":"2024-09-10T13:24:20.982196Z"},"trusted":true},"execution_count":21,"outputs":[{"execution_count":21,"output_type":"execute_result","data":{"text/plain":"PromtGuard(score='yes')"},"metadata":{}}]},{"cell_type":"code","source":"def grade_question_toxicity(state):\n \"\"\"\n Grades the question for toxicity.\n \n Args:\n state (dict): The current graph state.\n \n Returns:\n str: 'good' if the question passes the toxicity check, 'bad' otherwise.\n \"\"\"\n steps = state[\"steps\"]\n steps.append(\"promt guard\")\n score = toxicity_grader.invoke({\"question\": state[\"question\"]})\n grade = getattr(score, 'score', None)\n \n if grade == \"yes\":\n return \"bad\" \n else:\n return \"good\"\n ","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.984298Z","iopub.execute_input":"2024-09-10T13:24:20.984674Z","iopub.status.idle":"2024-09-10T13:24:20.990873Z","shell.execute_reply.started":"2024-09-10T13:24:20.984628Z","shell.execute_reply":"2024-09-10T13:24:20.990010Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"code","source":"class GradeHelpfulness(BaseModel):\n \"\"\"Binary score for Helpfulness check on answer.\"\"\"\n\n score: str = Field(\n description=\"Is the answer helpfulness, 'yes' or 'no'\"\n )\n\n\n# LLM with tool call\nstructured_llm_helpfulness_checker = llm.with_structured_output(GradeHelpfulness)\n\nprompt = PromptTemplate(\n template=\"\"\"You will be given a QUESTION and a STUDENT ANSWER. \n\n Here is the grade criteria to follow:\n\n (1) Ensure the STUDENT ANSWER is concise and relevant to the QUESTION\n\n (2) Ensure the STUDENT ANSWER helps to answer the QUESTION\n\n Score:\n\n A score of yes means that the student's answer meets all of the criteria. This is the highest (best) score. \n\n A score of no means that the student's answer does not meet all of the criteria. This is the lowest possible score you can give.\n\nExplain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. \n\nAvoid simply stating the correct answer at the outset.\n \n If the answer contains repeated phrases, repetition, then return 'no'\\n\n Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.\"\"\",\n input_variables=[\"generation\", \"question\"],\n)\n\nanswer_grader = prompt | structured_llm_helpfulness_checker\n","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:20.992091Z","iopub.execute_input":"2024-09-10T13:24:20.992387Z","iopub.status.idle":"2024-09-10T13:24:21.007690Z","shell.execute_reply.started":"2024-09-10T13:24:20.992354Z","shell.execute_reply":"2024-09-10T13:24:21.006836Z"},"trusted":true},"execution_count":23,"outputs":[]},{"cell_type":"code","source":"answer_grader.invoke({\"question\": \"Who won the NBA finals in 2024\" , \"generation\": \"The Champions of NBA in 2024 was Boston Celtics\"})","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:21.008768Z","iopub.execute_input":"2024-09-10T13:24:21.009165Z","iopub.status.idle":"2024-09-10T13:24:21.339498Z","shell.execute_reply.started":"2024-09-10T13:24:21.009131Z","shell.execute_reply":"2024-09-10T13:24:21.338667Z"},"trusted":true},"execution_count":24,"outputs":[{"execution_count":24,"output_type":"execute_result","data":{"text/plain":"GradeHelpfulness(score='yes')"},"metadata":{}}]},{"cell_type":"code","source":"def grade_document_relevance(question: str, document: str):\n input_data = {\"documents\": documents,\"question\": question, }\n try:\n result = retrieval_grader.invoke(input_data)\n return result\n except Exception as e:\n print(f\"Error parsing result: {e}\")\n return {\"score\": \"no\"} # Default to \"no\" if there is an error\n\nquestion = \"What are the types of agent memory?\"\ndocuments = \"Agents can have various types of memory, such as short-term memory and long-term memory.\"\ngrade = grade_document_relevance(documents,question )\nprint(grade) # Expected output: {'value': 'yes'}","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:21.340654Z","iopub.execute_input":"2024-09-10T13:24:21.341031Z","iopub.status.idle":"2024-09-10T13:24:21.666524Z","shell.execute_reply.started":"2024-09-10T13:24:21.340987Z","shell.execute_reply":"2024-09-10T13:24:21.665720Z"},"trusted":true},"execution_count":25,"outputs":[{"name":"stdout","text":"binary_score='yes'\n","output_type":"stream"}]},{"cell_type":"code","source":"class GradeHaliucinations(BaseModel):\n \"\"\"Binary score for haliucinations check on answer.\"\"\"\n\n score: str = Field(\n description=\"Answer contains haliucinations, 'yes' or 'no'\"\n )\n\n\nstructured_llm_haliucinations_checker = llm.with_structured_output(GradeHaliucinations)\n\nprompt = PromptTemplate(\n template=\"\"\"You are a teacher grading a quiz. \n\nYou will be given FACTS and a STUDENT ANSWER. \n\nYou are grading STUDENT ANSWER of source FACTS. Focus on correctness of the STUDENT ANSWER and detection of any hallucinations.\n\nEnsure that the STUDENT ANSWER meets the following criteria: \n\n(1) it does not contain information outside of the FACTS\n\n(2) the STUDENT ANSWER should be fully grounded in and based upon information in the source documents\n\n\nScore:\n\nA score of yes means that the student's answer meets all of the criteria. This is the highest (best) score. \n\nA score of no means that the student's answer does not meet all of the criteria. This is the lowest possible score you can give.\n\nExplain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. \n\nAvoid simply stating the correct answer at the outset.\nSTUDENT ANSWER: {generation} \\n\n Fact: \\n\\n {documents} \\n\\n\n \n Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \\n\n Provide the binary score as a JSON with a single key 'score' and no premable or explanation.\n\"\"\",\n input_variables=[\"generation\", \"documents\"],\n)\n\nhallucination_grader = prompt | structured_llm_haliucinations_checker\n","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:21.667644Z","iopub.execute_input":"2024-09-10T13:24:21.668023Z","iopub.status.idle":"2024-09-10T13:24:21.676739Z","shell.execute_reply.started":"2024-09-10T13:24:21.667980Z","shell.execute_reply":"2024-09-10T13:24:21.675865Z"},"trusted":true},"execution_count":26,"outputs":[]},{"cell_type":"code","source":"hallucination_grader.invoke({'generation': \"Boston celtics won NBA in 2024\", 'documents' : \"The champions of Nba in 2024 was The Boston Celtics\"})","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:21.677786Z","iopub.execute_input":"2024-09-10T13:24:21.678096Z","iopub.status.idle":"2024-09-10T13:24:22.012286Z","shell.execute_reply.started":"2024-09-10T13:24:21.678065Z","shell.execute_reply":"2024-09-10T13:24:22.011439Z"},"trusted":true},"execution_count":27,"outputs":[{"execution_count":27,"output_type":"execute_result","data":{"text/plain":"GradeHaliucinations(score='no')"},"metadata":{}}]},{"cell_type":"code","source":"re_write_prompt = PromptTemplate(\n template=\"\"\"You are a question re-writer that converts an input question to a better version that is optimized for vector store retrieval.\\n\n Your task is to enhance the question by clarifying the intent, removing any ambiguity, and including specific details to retrieve the most relevant information.. \\n\n I dont' need explanations, only enhanced question.\n Here is the initial question: \\n\\n {question}. Improved question with no preamble: \\n \"\"\",\n input_variables=[\"question\", \"question\"],\n)\n\nquestion_rewriter = re_write_prompt | llm | StrOutputParser()","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:22.013561Z","iopub.execute_input":"2024-09-10T13:24:22.013928Z","iopub.status.idle":"2024-09-10T13:24:22.019085Z","shell.execute_reply.started":"2024-09-10T13:24:22.013886Z","shell.execute_reply":"2024-09-10T13:24:22.018202Z"},"trusted":true},"execution_count":28,"outputs":[]},{"cell_type":"code","source":"def transform_query(state):\n \"\"\"\n Transform the query to produce a better question.\n\n Args:\n state (dict): The current graph state\n\n Returns:\n state (dict): Updates question key with a re-phrased question\n \"\"\"\n\n print(\"---TRANSFORM QUERY---\")\n question = state[\"question\"]\n documents = state[\"documents\"]\n steps = state[\"steps\"]\n steps.append(\"question_transformation\")\n\n # Re-write question\n better_question = question_rewriter.invoke({\"question\": question})\n print(f\" Transformed question: {better_question}\")\n return {\"documents\": documents, \"question\": better_question}\n","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:22.027131Z","iopub.execute_input":"2024-09-10T13:24:22.027822Z","iopub.status.idle":"2024-09-10T13:24:22.035332Z","shell.execute_reply.started":"2024-09-10T13:24:22.027788Z","shell.execute_reply":"2024-09-10T13:24:22.034619Z"},"trusted":true},"execution_count":29,"outputs":[]},{"cell_type":"code","source":"not_supported_count = 0","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:22.036392Z","iopub.execute_input":"2024-09-10T13:24:22.036688Z","iopub.status.idle":"2024-09-10T13:24:22.046986Z","shell.execute_reply.started":"2024-09-10T13:24:22.036657Z","shell.execute_reply":"2024-09-10T13:24:22.046255Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"from langchain.schema import Document\n\ndef format_google_results(google_results):\n formatted_documents = []\n \n # Loop through each organic result and create a Document for it\n for result in google_results['organic']:\n title = result.get('title', 'No title')\n link = result.get('link', 'No link')\n snippet = result.get('snippet', 'No summary available')\n\n # Create a Document object with similar metadata structure to WikipediaRetriever\n document = Document(\n metadata={\n 'title': title,\n 'summary': snippet,\n 'source': link\n },\n page_content=snippet # Using the snippet as the page content\n )\n \n formatted_documents.append(document)\n \n return formatted_documents","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:22.048004Z","iopub.execute_input":"2024-09-10T13:24:22.048365Z","iopub.status.idle":"2024-09-10T13:24:22.058023Z","shell.execute_reply.started":"2024-09-10T13:24:22.048321Z","shell.execute_reply":"2024-09-10T13:24:22.057141Z"},"trusted":true},"execution_count":31,"outputs":[]},{"cell_type":"code","source":"def grade_generation_v_documents_and_question(state):\n \"\"\"\n Determines whether the generation is grounded in the document and answers the question.\n \"\"\"\n print(\"---CHECK HALLUCINATIONS---\")\n question = state[\"question\"]\n documents = state[\"documents\"]\n generation = state[\"generation\"]\n generation_count = state.get(\"generation_count\") # Use state.get to avoid KeyError\n print(f\" generation number: {generation_count}\")\n \n # Grading hallucinations\n score = hallucination_grader.invoke(\n {\"documents\": documents, \"generation\": generation}\n )\n grade = getattr(score, 'score', None)\n\n # Check hallucination\n if grade == \"yes\":\n print(\"---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\")\n # Check question-answering\n print(\"---GRADE GENERATION vs QUESTION---\")\n score = answer_grader.invoke({\"question\": question, \"generation\": generation})\n grade = getattr(score, 'score', None)\n if grade == \"yes\":\n print(\"---DECISION: GENERATION ADDRESSES QUESTION---\")\n return \"useful\"\n else:\n print(\"---DECISION: GENERATION DOES NOT ADDRESS QUESTION---\")\n return \"not useful\"\n else:\n if generation_count > 1:\n print(\"---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, TRANSFORM QUERY---\")\n # Reset count if it exceeds limit\n return \"not useful\"\n else:\n print(\"---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---\")\n # Increment correctly here\n print(f\" generation number after increment: {state['generation_count']}\")\n return \"not supported\"","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:22.059331Z","iopub.execute_input":"2024-09-10T13:24:22.060097Z","iopub.status.idle":"2024-09-10T13:24:22.074870Z","shell.execute_reply.started":"2024-09-10T13:24:22.060062Z","shell.execute_reply":"2024-09-10T13:24:22.074165Z"},"trusted":true},"execution_count":32,"outputs":[]},{"cell_type":"code","source":"from typing_extensions import TypedDict, List\nfrom IPython.display import Image, display\nfrom langgraph.graph import START, END, StateGraph\nfrom langchain.schema import Document\n\nclass GraphState(TypedDict):\n \"\"\"\n Represents the state of our graph.\n\n Attributes:\n question: question\n generation: LLM generation\n search: whether to add search\n documents: list of documents\n generations_count : generations count\n \"\"\"\n\n question: str\n generation: str\n search: str\n documents: List[str]\n steps: List[str]\n generation_count: int\n\ndef ask_question(state):\n \"\"\"\n Initialize question\n\n Args:\n state (dict): The current graph state\n\n Returns:\n state (dict): Question\n \"\"\"\n steps = state[\"steps\"]\n question = state[\"question\"]\n generations_count = state.get(\"generations_count\", 0) \n documents = retriever.invoke(question)\n \n steps.append(\"question_asked\")\n return {\"question\": question, \"steps\": steps,\"generation_count\": generations_count}\n \n \ndef retrieve(state):\n \"\"\"\n Retrieve documents\n\n Args:\n state (dict): The current graph state\n\n Returns:\n state (dict): New key added to state, documents, that contains retrieved documents\n \"\"\"\n steps = state[\"steps\"]\n question = state[\"question\"]\n \n documents = retriever.invoke(question)\n \n steps.append(\"retrieve_documents\")\n return {\"documents\": documents, \"question\": question, \"steps\": steps}\n\n\ndef generate(state):\n \"\"\"\n Generate answer\n \"\"\"\n question = state[\"question\"]\n documents = state[\"documents\"]\n generation = rag_chain.invoke({\"documents\": documents, \"question\": question})\n steps = state[\"steps\"]\n steps.append(\"generate_answer\")\n generation_count = state[\"generation_count\"]\n \n generation_count += 1\n \n return {\n \"documents\": documents,\n \"question\": question,\n \"generation\": generation,\n \"steps\": steps,\n \"generation_count\": generation_count # Include generation_count in return\n }\n\n\ndef grade_documents(state):\n question = state[\"question\"]\n documents = state[\"documents\"]\n steps = state[\"steps\"]\n steps.append(\"grade_document_retrieval\")\n \n filtered_docs = []\n web_results_list = []\n search = \"No\"\n \n for d in documents:\n # Call the grading function\n score = retrieval_grader.invoke({\"question\": question, \"documents\": d.page_content})\n print(f\"Grader output for document: {score}\") # Detailed debugging output\n \n # Extract the grade\n grade = getattr(score, 'binary_score', None)\n if grade and grade.lower() in [\"yes\", \"true\", \"1\"]:\n filtered_docs.append(d)\n elif len(filtered_docs) < 4: \n search = \"Yes\"\n \n # Check the decision-making process\n print(f\"Final decision - Perform web search: {search}\")\n print(f\"Filtered documents count: {len(filtered_docs)}\")\n \n return {\n \"documents\": filtered_docs,\n \"question\": question,\n \"search\": search,\n \"steps\": steps,\n }\n\ndef web_search(state):\n question = state[\"question\"]\n documents = state.get(\"documents\")\n steps = state[\"steps\"]\n steps.append(\"web_search\")\n k = 4 - len(documents)\n good_wiki_splits = []\n good_exa_splits = []\n web_results_list = []\n\n wiki_results = WikipediaRetriever( lang = 'en',top_k_results = 1,doc_content_chars_max = 1000).invoke(question)\n \n \n if k<1:\n combined_documents = documents + wiki_results \n else:\n web_results = GoogleSerperAPIWrapper(k = k).results(question)\n formatted_documents = format_google_results(web_results)\n for doc in formatted_documents:\n web_results_list.append(doc)\n \n \n combined_documents = documents + wiki_results + web_results_list\n\n return {\"documents\": combined_documents, \"question\": question, \"steps\": steps}\n\ndef decide_to_generate(state):\n \"\"\"\n Determines whether to generate an answer, or re-generate a question.\n\n Args:\n state (dict): The current graph state\n\n Returns:\n str: Binary decision for next node to call\n \"\"\"\n search = state[\"search\"]\n if search == \"Yes\":\n return \"search\"\n else:\n return \"generate\"\n\n\n# Graph\nworkflow = StateGraph(GraphState)\n\n# Define the nodes\nworkflow.add_node(\"ask_question\", ask_question)\nworkflow.add_node(\"retrieve\", retrieve) # retrieve\nworkflow.add_node(\"grade_documents\", grade_documents) # grade documents\nworkflow.add_node(\"generate\", generate) # generatae\nworkflow.add_node(\"web_search\", web_search) # web search\nworkflow.add_node(\"transform_query\", transform_query)\n\n\n# Build graph\nworkflow.set_entry_point(\"ask_question\")\nworkflow.add_conditional_edges(\n \"ask_question\",\n grade_question_toxicity,\n \n {\n \"good\": \"retrieve\",\n 'bad': END,\n \n },\n)\n\nworkflow.add_edge(\"retrieve\", \"grade_documents\")\nworkflow.add_conditional_edges(\n \"grade_documents\",\n decide_to_generate,\n {\n \"search\": \"web_search\",\n \"generate\": \"generate\",\n \n },\n)\nworkflow.add_edge(\"web_search\", \"generate\")\nworkflow.add_conditional_edges(\n \"generate\",\n grade_generation_v_documents_and_question,\n {\n \"not supported\": \"generate\",\n \"useful\": END,\n \"not useful\": \"transform_query\",\n },\n)\n\nworkflow.add_edge(\"transform_query\", \"retrieve\")\n\ncustom_graph = workflow.compile()\n\ndisplay(Image(custom_graph.get_graph(xray=True).draw_mermaid_png()))","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:22.076066Z","iopub.execute_input":"2024-09-10T13:24:22.076343Z","iopub.status.idle":"2024-09-10T13:24:23.223047Z","shell.execute_reply.started":"2024-09-10T13:24:22.076313Z","shell.execute_reply":"2024-09-10T13:24:23.221908Z"},"trusted":true},"execution_count":33,"outputs":[{"output_type":"display_data","data":{"image/jpeg":"","text/plain":""},"metadata":{}}]},{"cell_type":"code","source":"WikipediaRetriever( lang = 'en',top_k_results = 1,doc_content_chars_max = 1000).invoke(\"Who was the first king of Lithuania\")","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:23.224552Z","iopub.execute_input":"2024-09-10T13:24:23.224986Z","iopub.status.idle":"2024-09-10T13:24:24.529686Z","shell.execute_reply.started":"2024-09-10T13:24:23.224915Z","shell.execute_reply":"2024-09-10T13:24:24.528720Z"},"trusted":true},"execution_count":34,"outputs":[{"execution_count":34,"output_type":"execute_result","data":{"text/plain":"[Document(metadata={'title': 'Kingdom of Lithuania', 'summary': \"The Kingdom of Lithuania was a sovereign state that existed from the 17 July 1251 until the death of the first crowned king of Lithuania, Mindaugas, on 12 September 1263. Mindaugas was the only Lithuanian monarch crowned king with the assent of the Pope and the head of the first catholic Lithuanian state. The formation of the kingdom is widely regarded as a partially successful attempt at unifying all surrounding Baltic tribes, including the Old Prussians, into a single unified state under a common king.\\nOther monarchs of Lithuania were referred to as grand dukes, kings or emperors in extant foreign written sources as the size of the realm and their power expanded or contracted. This practice can be compared to that of British, Japanese and many other monarchs who are known as kings or emperors in spite of not being crowned with the assent of the Pope. Because Lithuania was pagan in the 13th century, Lithuanian monarchs were not granted the title of a Catholic monarch even though extant Christian sources referred to Lithuanian rulers as kings or emperors regardless of their religious affiliation. For instance, Gediminas titled himself King of Lithuania and Rus, and Duke of Semigalia. The Pope also addressed him as King. \\nThe confusion stems from eastern and western European traditions of royal hierarchy and titles. In Eastern Europe, the title of grand duke equalled king and sometimes emperor. In Western Europe, the title of grand duke is reserved to monarchs of small polities and ranks junior to king and emperor.\\nAfter the formal Christianization and especially after the creation of the personal union with Poland, the Kings of Poland–Lithuania retained the separate titles of Grand Dukes of Lithuania and Kings of Poland (similarly to how the Emperors of Austria–Hungary had retained the separate titles of Emperor of Austria and King of Hungary, to some extent).\\nThe Catholic crown was to be received from the Pope or Holy Roman Emperor, but being multireligious and powerful, Lithuania was not subservient to either and with rare exceptions did not pursue the title. For diplomatic reasons three further attempts were made to re-establish the Kingdom status – by Vytautas the Great in 1430, by Švitrigaila who wanted to continue Vytautas' attempts at the coronation, and by the Council of Lithuania in 1918.\", 'source': 'https://en.wikipedia.org/wiki/Kingdom_of_Lithuania'}, page_content='The Kingdom of Lithuania was a sovereign state that existed from the 17 July 1251 until the death of the first crowned king of Lithuania, Mindaugas, on 12 September 1263. Mindaugas was the only Lithuanian monarch crowned king with the assent of the Pope and the head of the first catholic Lithuanian state. The formation of the kingdom is widely regarded as a partially successful attempt at unifying all surrounding Baltic tribes, including the Old Prussians, into a single unified state under a common king.\\nOther monarchs of Lithuania were referred to as grand dukes, kings or emperors in extant foreign written sources as the size of the realm and their power expanded or contracted. This practice can be compared to that of British, Japanese and many other monarchs who are known as kings or emperors in spite of not being crowned with the assent of the Pope. Because Lithuania was pagan in the 13th century, Lithuanian monarchs were not granted the title of a Catholic monarch even though extan')]"},"metadata":{}}]},{"cell_type":"code","source":"GoogleSerperAPIWrapper(k = 4).results(\"Who was the first king of Lithuania?\")","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:24.531046Z","iopub.execute_input":"2024-09-10T13:24:24.531511Z","iopub.status.idle":"2024-09-10T13:24:25.225895Z","shell.execute_reply.started":"2024-09-10T13:24:24.531465Z","shell.execute_reply":"2024-09-10T13:24:25.225002Z"},"trusted":true},"execution_count":35,"outputs":[{"execution_count":35,"output_type":"execute_result","data":{"text/plain":"{'searchParameters': {'q': 'Who was the first king of Lithuania?',\n 'gl': 'us',\n 'hl': 'en',\n 'type': 'search',\n 'num': 4,\n 'engine': 'google'},\n 'organic': [{'title': 'List of Lithuanian monarchs - Wikipedia',\n 'link': 'https://en.wikipedia.org/wiki/List_of_Lithuanian_monarchs',\n 'snippet': 'The hereditary monarchy in Lithuania was first established in the 13th century during the reign of Mindaugas I and officially re-established as a ...',\n 'sitelinks': [{'title': 'Titles',\n 'link': 'https://en.wikipedia.org/wiki/List_of_Lithuanian_monarchs#Titles'},\n {'title': 'Inaugurations of Lithuanian...',\n 'link': 'https://en.wikipedia.org/wiki/List_of_Lithuanian_monarchs#Inaugurations_of_Lithuanian_monarchs'},\n {'title': 'List',\n 'link': 'https://en.wikipedia.org/wiki/List_of_Lithuanian_monarchs#List'},\n {'title': 'Union of Lublin',\n 'link': 'https://en.wikipedia.org/wiki/List_of_Lithuanian_monarchs#Union_of_Lublin'}],\n 'position': 1},\n {'title': 'Kingdom of Lithuania - Wikipedia',\n 'link': 'https://en.wikipedia.org/wiki/Kingdom_of_Lithuania',\n 'snippet': 'The Kingdom of Lithuania was a sovereign state that existed from the 17 July 1251 until the death of the first crowned king of Lithuania, Mindaugas, on 12 ...',\n 'sitelinks': [{'title': 'King Mindaugas',\n 'link': 'https://en.wikipedia.org/wiki/Kingdom_of_Lithuania#King_Mindaugas'},\n {'title': 'Attempts to re-establish the...',\n 'link': 'https://en.wikipedia.org/wiki/Kingdom_of_Lithuania#Attempts_to_re-establish_the_Kingdom'}],\n 'position': 2},\n {'title': 'Mindaugas | Grand Duke, Lithuania, Coronation | Britannica',\n 'link': 'https://www.britannica.com/biography/Mindaugas',\n 'snippet': 'Mindaugas (died 1263) was a ruler of Lithuania, considered the founder of the Lithuanian state. He was also the first Lithuanian ruler to become a Christian.',\n 'position': 3},\n {'title': 'Historical development - Eurydice - European Union',\n 'link': 'https://eurydice.eacea.ec.europa.eu/national-education-systems/lithuania/historical-development',\n 'snippet': \"Mindaugas was the first king in Lithuania's history. At the end of the 12th century, on the basis of the Duchy of Lithuania that was joined by ...\",\n 'date': 'Nov 27, 2023',\n 'position': 4}],\n 'relatedSearches': [{'query': 'Who was the first king of lithuania before ww2'},\n {'query': 'Who was the last King of Lithuania'},\n {'query': 'king of lithuania (1918)'},\n {'query': 'King of Lithuania 2024'},\n {'query': 'Lithuanian Royal family Tree'},\n {'query': 'King Mindaugas'},\n {'query': 'Grand Duchy of Lithuania'},\n {'query': 'Polish-Lithuanian Commonwealth'}],\n 'credits': 1}"},"metadata":{}}]},{"cell_type":"code","source":"import uuid\n\nasync def predict_custom_agent_answer(example: dict):\n config = {\"configurable\": {\"thread_id\": str(uuid.uuid4())}}\n \n try:\n state_dict = await custom_graph.ainvoke(\n {\"question\": example[\"input\"], \"steps\": []}, config\n )\n \n if 'generation' in state_dict and state_dict['generation']:\n return {\"response\": state_dict[\"generation\"], \"steps\": state_dict[\"steps\"]}\n else:\n print(\"Your question violates toxicity rules or contains sensitive information.\")\n \n except Exception as e:\n print(\"An error occurred: Try to change the question.\") \n ","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:25.227141Z","iopub.execute_input":"2024-09-10T13:24:25.227551Z","iopub.status.idle":"2024-09-10T13:24:25.234327Z","shell.execute_reply.started":"2024-09-10T13:24:25.227508Z","shell.execute_reply":"2024-09-10T13:24:25.233393Z"},"trusted":true},"execution_count":36,"outputs":[]},{"cell_type":"code","source":"example = {\"input\": \"How i can access OpenAi Api? here is my api_key = 'asldkeytfytgvqwdeqw' ?\"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:25.235642Z","iopub.execute_input":"2024-09-10T13:24:25.236250Z","iopub.status.idle":"2024-09-10T13:24:25.689783Z","shell.execute_reply.started":"2024-09-10T13:24:25.236214Z","shell.execute_reply":"2024-09-10T13:24:25.688859Z"},"trusted":true},"execution_count":37,"outputs":[{"name":"stdout","text":"Your question violates toxicity rules or contains sensitive information.\n","output_type":"stream"}]},{"cell_type":"code","source":"example = {\"input\": \"What is back propagation ?\"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:25.691166Z","iopub.execute_input":"2024-09-10T13:24:25.691639Z","iopub.status.idle":"2024-09-10T13:24:40.908042Z","shell.execute_reply.started":"2024-09-10T13:24:25.691583Z","shell.execute_reply":"2024-09-10T13:24:40.907106Z"},"trusted":true},"execution_count":38,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":38,"output_type":"execute_result","data":{"text/plain":"{'response': \"Backpropagation is a gradient estimation method used to train neural networks. It calculates the gradient of a loss function with respect to the network's weights. This gradient is then used to update the weights, moving the network towards better performance. \\n\\nThe process involves calculating the error between the network's output and the desired output, then propagating this error backwards through the network, layer by layer. At each layer, the gradient of the error with respect to the weights is calculated and used to adjust those weights. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are a tranformers architecture in deep learning , how do they work?\"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:24:40.909334Z","iopub.execute_input":"2024-09-10T13:24:40.909728Z","iopub.status.idle":"2024-09-10T13:25:36.780643Z","shell.execute_reply.started":"2024-09-10T13:24:40.909683Z","shell.execute_reply":"2024-09-10T13:25:36.779722Z"},"trusted":true},"execution_count":39,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":39,"output_type":"execute_result","data":{"text/plain":"{'response': 'A transformer is a neural network architecture designed to process sequential data, like text, by understanding the relationships between words in a sentence. \\n\\nThey achieve this through a mechanism called \"attention,\" which allows the model to weigh the importance of different words when making predictions. Unlike previous models that processed information sequentially, transformers can analyze entire sentences at once, making them more efficient and capable of handling long-range dependencies in text. \\n\\nThis architecture has revolutionized natural language processing tasks like machine translation, text summarization, and question answering, leading to significant improvements in accuracy and performance. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are concerns of Ethics in AI ?\"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:25:36.781931Z","iopub.execute_input":"2024-09-10T13:25:36.782333Z","iopub.status.idle":"2024-09-10T13:26:25.667208Z","shell.execute_reply.started":"2024-09-10T13:25:36.782289Z","shell.execute_reply":"2024-09-10T13:26:25.666231Z"},"trusted":true},"execution_count":40,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":40,"output_type":"execute_result","data":{"text/plain":"{'response': \"Concerns about ethics in AI include:\\n\\n* **Potential for harm:** AI's increasing capabilities raise concerns about its potential to be misused or cause unintended harm.\\n* **Bias and discrimination:** AI models can perpetuate and amplify existing societal biases, leading to unfair or discriminatory outcomes.\\n* **Lack of transparency:** The complexity of AI algorithms often makes it difficult to understand how they work, raising concerns about accountability and potential manipulation.\\n* **Privacy issues:** AI systems often rely on vast amounts of data, which can include sensitive personal information, raising concerns about privacy violations.\\n* **Job displacement:** AI-powered automation may lead to job losses in certain sectors.\\n* **Weaponization:** There are concerns about the potential use of AI in developing autonomous weapons systems. \\n\\n\\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Explain technical details about gradient descent \"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:26:25.668456Z","iopub.execute_input":"2024-09-10T13:26:25.668761Z","iopub.status.idle":"2024-09-10T13:27:27.950999Z","shell.execute_reply.started":"2024-09-10T13:26:25.668726Z","shell.execute_reply":"2024-09-10T13:27:27.950171Z"},"trusted":true},"execution_count":41,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":41,"output_type":"execute_result","data":{"text/plain":"{'response': \"Gradient descent is an optimization algorithm used to train machine learning models. It works by iteratively adjusting the model's parameters (weights) in the direction that minimizes a loss function, which measures the difference between the model's predictions and the actual values. \\n\\nThe algorithm calculates the gradient of the loss function with respect to the parameters. The gradient points in the direction of the steepest ascent of the loss function. Gradient descent then updates the parameters in the opposite direction of the gradient, effectively moving towards a minimum of the loss function. \\n\\nThis process is repeated until the loss function converges to a minimum, indicating that the model has been trained effectively. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What mathematical topics is needed to understand Deep Learning \"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:27:27.952394Z","iopub.execute_input":"2024-09-10T13:27:27.953206Z","iopub.status.idle":"2024-09-10T13:28:19.780159Z","shell.execute_reply.started":"2024-09-10T13:27:27.953158Z","shell.execute_reply":"2024-09-10T13:28:19.779254Z"},"trusted":true},"execution_count":42,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":42,"output_type":"execute_result","data":{"text/plain":"{'response': 'Linear algebra, multivariable calculus, and statistics are the most important mathematical topics for understanding deep learning. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"I want to evaluate my langraph question answering bot , suggest me a ways \"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:28:19.781433Z","iopub.execute_input":"2024-09-10T13:28:19.781766Z","iopub.status.idle":"2024-09-10T13:29:10.851561Z","shell.execute_reply.started":"2024-09-10T13:28:19.781732Z","shell.execute_reply":"2024-09-10T13:29:10.850581Z"},"trusted":true},"execution_count":43,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":43,"output_type":"execute_result","data":{"text/plain":"{'response': \"To evaluate your Langraph question-answering bot, you can use metrics like BLEU, ROUGE, and accuracy. You can also use human evaluation to assess the quality and relevance of the bot's responses. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Who was the first king of Lithuania and when he was crowned? \"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:29:10.852673Z","iopub.execute_input":"2024-09-10T13:29:10.852943Z","iopub.status.idle":"2024-09-10T13:30:02.837676Z","shell.execute_reply.started":"2024-09-10T13:29:10.852913Z","shell.execute_reply":"2024-09-10T13:30:02.836720Z"},"trusted":true},"execution_count":44,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":44,"output_type":"execute_result","data":{"text/plain":"{'response': 'Mindaugas was the first king of Lithuania and he was crowned on July 17, 1251. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Who made James Bond movies? Which is the most popular \"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:30:02.838935Z","iopub.execute_input":"2024-09-10T13:30:02.839280Z","iopub.status.idle":"2024-09-10T13:30:51.840997Z","shell.execute_reply.started":"2024-09-10T13:30:02.839245Z","shell.execute_reply":"2024-09-10T13:30:51.840080Z"},"trusted":true},"execution_count":45,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":45,"output_type":"execute_result","data":{"text/plain":"{'response': 'Eon Productions makes the James Bond movies. \\n\\nThe most popular James Bond movie is *Goldfinger*. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Who wrote Lord of the rings and how many books are there? \"}\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:30:51.842201Z","iopub.execute_input":"2024-09-10T13:30:51.842516Z","iopub.status.idle":"2024-09-10T13:31:40.579612Z","shell.execute_reply.started":"2024-09-10T13:30:51.842482Z","shell.execute_reply":"2024-09-10T13:31:40.578667Z"},"trusted":true},"execution_count":46,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":46,"output_type":"execute_result","data":{"text/plain":"{'response': 'J.R.R. Tolkien wrote *The Lord of the Rings*. There are three books in the series. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are five types of adversarial attacks?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:31:40.580692Z","iopub.execute_input":"2024-09-10T13:31:40.580971Z","iopub.status.idle":"2024-09-10T13:32:31.368068Z","shell.execute_reply.started":"2024-09-10T13:31:40.580928Z","shell.execute_reply":"2024-09-10T13:32:31.366973Z"},"trusted":true},"execution_count":47,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":47,"output_type":"execute_result","data":{"text/plain":"{'response': 'According to the provided text, five stages of adversarial attacks are:\\n\\n1. **Poisoning Attacks**\\n2. **Evasion Attacks**\\n3. **Model Extraction Attacks**\\n4. **Byzantine Attacks**\\n5. **Data Poisoning Attacks** \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are the types of biases that can arise with few-shot prompting??\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:32:31.369420Z","iopub.execute_input":"2024-09-10T13:32:31.369782Z","iopub.status.idle":"2024-09-10T13:33:23.314832Z","shell.execute_reply.started":"2024-09-10T13:32:31.369747Z","shell.execute_reply":"2024-09-10T13:33:23.313814Z"},"trusted":true},"execution_count":48,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":48,"output_type":"execute_result","data":{"text/plain":"{'response': \"Here are some biases that can arise with few-shot prompting:\\n\\n* **Majority label bias:** The model might favor answers that appear most frequently in the provided examples. \\n* **Contextual Misinterpretation:** The model could misunderstand the task's context due to the limited examples given. \\n\\n\\nLet me know if you have any other questions. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"\nexample = {\"input\": \"Who won the 2024 NBA finals?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:33:23.316167Z","iopub.execute_input":"2024-09-10T13:33:23.316589Z","iopub.status.idle":"2024-09-10T13:34:09.130284Z","shell.execute_reply.started":"2024-09-10T13:33:23.316542Z","shell.execute_reply":"2024-09-10T13:34:09.129311Z"},"trusted":true},"execution_count":49,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":49,"output_type":"execute_result","data":{"text/plain":"{'response': 'The Boston Celtics won the 2024 NBA Finals. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"\nexample = {\"input\": \"What are most common data distributions?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:34:09.131387Z","iopub.execute_input":"2024-09-10T13:34:09.131691Z","iopub.status.idle":"2024-09-10T13:35:05.998867Z","shell.execute_reply.started":"2024-09-10T13:34:09.131658Z","shell.execute_reply":"2024-09-10T13:35:05.997912Z"},"trusted":true},"execution_count":50,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":50,"output_type":"execute_result","data":{"text/plain":"{'response': \"The most common probability distributions are:\\n\\n* **Normal distribution:** Also known as the Gaussian distribution, it's symmetrical and bell-shaped.\\n* **Log-Normal distribution:** The logarithm of the data is normally distributed.\\n* **Student's t-distribution:** Used when the population standard deviation is unknown.\\n* **Chi-squared distribution:** Used in hypothesis testing, especially for goodness of fit. \\n\\n\\nLet me know if you'd like more details on any of these distributions! \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Does normal distribution has to be on infinite scale?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:35:06.000006Z","iopub.execute_input":"2024-09-10T13:35:06.000311Z","iopub.status.idle":"2024-09-10T13:36:01.090679Z","shell.execute_reply.started":"2024-09-10T13:35:06.000278Z","shell.execute_reply":"2024-09-10T13:36:01.089678Z"},"trusted":true},"execution_count":51,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":51,"output_type":"execute_result","data":{"text/plain":"{'response': 'A truly normal distribution extends from negative infinity to positive infinity. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Explain how in transformer decoder encoder works?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:36:01.091831Z","iopub.execute_input":"2024-09-10T13:36:01.092143Z","iopub.status.idle":"2024-09-10T13:36:58.903829Z","shell.execute_reply.started":"2024-09-10T13:36:01.092111Z","shell.execute_reply":"2024-09-10T13:36:58.902925Z"},"trusted":true},"execution_count":52,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":52,"output_type":"execute_result","data":{"text/plain":"{'response': \"The Transformer's decoder uses the encoded representation (hidden state) generated by the encoder to produce an output sequence, one word at a time. It attends not only to the previously generated words but also to the final representations from the encoder, allowing it to consider the full context of the input sentence. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are acitvation functions in deep learning?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:36:58.905144Z","iopub.execute_input":"2024-09-10T13:36:58.905799Z","iopub.status.idle":"2024-09-10T13:37:52.750627Z","shell.execute_reply.started":"2024-09-10T13:36:58.905751Z","shell.execute_reply":"2024-09-10T13:37:52.749680Z"},"trusted":true},"execution_count":53,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":53,"output_type":"execute_result","data":{"text/plain":"{'response': 'Activation functions introduce non-linearity into a neural network, enabling it to learn and represent complex patterns. They determine the output of a neuron based on its input. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Name three most commonly used activation functions and why they are used?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:37:52.752134Z","iopub.execute_input":"2024-09-10T13:37:52.752593Z","iopub.status.idle":"2024-09-10T13:38:53.705081Z","shell.execute_reply.started":"2024-09-10T13:37:52.752545Z","shell.execute_reply":"2024-09-10T13:38:53.704138Z"},"trusted":true},"execution_count":54,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":54,"output_type":"execute_result","data":{"text/plain":"{'response': 'Three commonly used activation functions are:\\n\\n* **ReLU (Rectified Linear Unit):** It\\'s widely used due to its simplicity and effectiveness. ReLU activates a neuron only when the input is positive, helping to address the vanishing gradient problem.\\n\\n* **Leaky ReLU:** A variation of ReLU that allows a small, non-zero gradient for negative inputs, mitigating the \"dying ReLU\" problem where neurons can become inactive.\\n\\n* **Sigmoid:** Produces bounded outputs between 0 and 1, often used in output layers for binary classification problems. \\n\\n\\n\\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Name three most commonly used data distributions and why they are used?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:38:53.706300Z","iopub.execute_input":"2024-09-10T13:38:53.706614Z","iopub.status.idle":"2024-09-10T13:39:50.622190Z","shell.execute_reply.started":"2024-09-10T13:38:53.706580Z","shell.execute_reply":"2024-09-10T13:39:50.621279Z"},"trusted":true},"execution_count":55,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":55,"output_type":"execute_result","data":{"text/plain":"{'response': \"Three commonly used data distributions are:\\n\\n* **Normal Distribution:** This is the most common distribution, characterized by its bell-shaped curve. It's used to model many natural phenomena and is essential in statistical analysis.\\n\\n* **Binomial Distribution:** This distribution describes the probability of success or failure in a fixed number of independent trials. It's used in situations like coin flips or surveys.\\n\\n* **Poisson Distribution:** This distribution models the probability of a given number of events occurring in a fixed interval of time or space. It's used for things like counting the number of customers arriving at a store per hour. \\n\\n\\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Explain the difference between regression and classification in the context of machine learning. Provide examples of each.?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:39:50.623645Z","iopub.execute_input":"2024-09-10T13:39:50.623950Z","iopub.status.idle":"2024-09-10T13:40:50.929061Z","shell.execute_reply.started":"2024-09-10T13:39:50.623916Z","shell.execute_reply":"2024-09-10T13:40:50.928150Z"},"trusted":true},"execution_count":56,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":56,"output_type":"execute_result","data":{"text/plain":"{'response': 'Regression and classification are both supervised learning tasks, but they deal with different types of output variables. \\n\\n**Regression** predicts a continuous numerical value. For example, predicting the price of a house, the number of sales a store will make, or the temperature tomorrow.\\n\\n**Classification** predicts a categorical value, essentially assigning data points to predefined classes or categories. Examples include identifying spam emails, classifying images as cats or dogs, or predicting whether a customer will click on an ad. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Describe the softmax function and its role in classification problems. How does it ensure that the output values represent probabilities?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:40:50.930254Z","iopub.execute_input":"2024-09-10T13:40:50.930895Z","iopub.status.idle":"2024-09-10T13:41:50.111321Z","shell.execute_reply.started":"2024-09-10T13:40:50.930859Z","shell.execute_reply":"2024-09-10T13:41:50.110409Z"},"trusted":true},"execution_count":57,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":57,"output_type":"execute_result","data":{"text/plain":"{'response': \"The softmax function takes a vector of raw scores (logits) and converts them into a probability distribution. Each element in the output vector represents the probability of the corresponding class. \\n\\nHere's how it works:\\n\\n1. **Exponentiation:** The function first exponentiates each element in the input vector. This amplifies the differences between larger scores.\\n\\n2. **Normalization:** It then normalizes the exponentiated values by dividing each by the sum of all exponentiated values. This ensures that the output values sum up to 1, representing a valid probability distribution.\\n\\nThis process guarantees that the output values are between 0 and 1 and sum to 1, making them suitable for representing probabilities in classification tasks. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \" Given a dataset of images labeled as cat, dog, or chicken, how would you apply softmax regression to classify new images? What loss function would you use, and why?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:41:50.112575Z","iopub.execute_input":"2024-09-10T13:41:50.112980Z","iopub.status.idle":"2024-09-10T13:42:39.345183Z","shell.execute_reply.started":"2024-09-10T13:41:50.112915Z","shell.execute_reply":"2024-09-10T13:42:39.344272Z"},"trusted":true},"execution_count":58,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":58,"output_type":"execute_result","data":{"text/plain":"{'response': 'You would first use the features extracted from the new image as input to a linear model. This model would output a score for each of the three classes (cat, dog, chicken). The softmax function would then be applied to these scores to produce a probability distribution over the classes. The most likely class, corresponding to the highest probability, would be the predicted class.\\n\\nThe loss function you would use is cross-entropy loss. This is because it measures the difference between the predicted probability distribution and the true distribution (which is a one-hot encoding of the correct class). \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \" Explain the concept of inheritance in OOP.\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:42:39.346330Z","iopub.execute_input":"2024-09-10T13:42:39.346610Z","iopub.status.idle":"2024-09-10T13:43:29.023621Z","shell.execute_reply.started":"2024-09-10T13:42:39.346579Z","shell.execute_reply":"2024-09-10T13:43:29.022681Z"},"trusted":true},"execution_count":59,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":59,"output_type":"execute_result","data":{"text/plain":"{'response': 'Inheritance in object-oriented programming is a mechanism that allows a class (child class) to inherit attributes and methods from another class (parent class). This promotes code reuse and establishes a hierarchical relationship between classes. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \" What is the purpose of the __init__ method in a Python class? \" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:43:29.025092Z","iopub.execute_input":"2024-09-10T13:43:29.025745Z","iopub.status.idle":"2024-09-10T13:44:17.824605Z","shell.execute_reply.started":"2024-09-10T13:43:29.025695Z","shell.execute_reply":"2024-09-10T13:44:17.823580Z"},"trusted":true},"execution_count":60,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":60,"output_type":"execute_result","data":{"text/plain":"{'response': \"The `__init__` method is a special method (constructor) in Python classes. It is automatically called when a new instance (object) of the class is created. Its primary purpose is to initialize the object's attributes or perform any setup required for the object. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \" Given the Dog class and its child classes, how would you create a new dog breed called Golden Retriever that has a specific bark and a unique method called fetch? \" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:44:17.825856Z","iopub.execute_input":"2024-09-10T13:44:17.826185Z","iopub.status.idle":"2024-09-10T13:45:01.912021Z","shell.execute_reply.started":"2024-09-10T13:44:17.826150Z","shell.execute_reply":"2024-09-10T13:45:01.911027Z"},"trusted":true},"execution_count":61,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":61,"output_type":"execute_result","data":{"text/plain":"{'response': '```php\\nclass Dog {\\n public function bark() {\\n echo \"Woof!\";\\n }\\n}\\n\\nclass GoldenRetriever extends Dog {\\n public function bark() {\\n echo \"Woof woof!\"; // Specific bark for Golden Retriever\\n }\\n\\n public function fetch() {\\n echo \"Fetching the ball!\"; // Unique method for Golden Retriever\\n }\\n}\\n\\n$golden = new GoldenRetriever();\\n$golden->bark(); // Output: Woof woof!\\n$golden->fetch(); // Output: Fetching the ball!\\n```\\n\\nThis code defines a `Dog` class with a basic `bark` method. Then, the `GoldenRetriever` class inherits from `Dog` and overrides the `bark` method to have a more specific sound. It also adds a new method called `fetch` that is unique to the Golden Retriever breed. \\n\\n\\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \" How does object-oriented programming differ from procedural programming? Provide examples of the differences.\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:45:01.913479Z","iopub.execute_input":"2024-09-10T13:45:01.913848Z","iopub.status.idle":"2024-09-10T13:45:52.228624Z","shell.execute_reply.started":"2024-09-10T13:45:01.913810Z","shell.execute_reply":"2024-09-10T13:45:52.227647Z"},"trusted":true},"execution_count":62,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":62,"output_type":"execute_result","data":{"text/plain":"{'response': 'Object-oriented programming (OOP) structures code around \"objects\" which contain both data (attributes) and functions (methods) that operate on that data. Procedural programming, on the other hand, focuses on breaking a program down into a sequence of procedures or functions that perform specific tasks. \\n\\nHere\\'s an example:\\n\\n* **OOP:** Imagine modeling a \"Dog\" object. This object would have attributes like \"name,\" \"breed,\" and \"age,\" and methods like \"bark,\" \"fetch,\" and \"eat.\"\\n\\n* **Procedural:** A procedural approach might have separate functions for tasks like \"get_dog_name,\" \"make_dog_bark,\" and \"feed_dog.\" \\n\\nOOP emphasizes code reusability, modularity, and data security through concepts like inheritance and encapsulation, making it well-suited for complex projects. Procedural programming is often simpler for smaller programs but can become harder to manage as projects grow. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \" Explain Statistical Significance. how it can help?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:45:52.229906Z","iopub.execute_input":"2024-09-10T13:45:52.230313Z","iopub.status.idle":"2024-09-10T13:46:48.153933Z","shell.execute_reply.started":"2024-09-10T13:45:52.230268Z","shell.execute_reply":"2024-09-10T13:46:48.153031Z"},"trusted":true},"execution_count":63,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":63,"output_type":"execute_result","data":{"text/plain":"{'response': 'Statistical significance measures the likelihood that an observed relationship between two variables is not due to random chance. \\n\\nIt helps researchers determine if their results are reliable and not simply a fluke. A high degree of statistical significance suggests that the observed relationship is likely real and not caused by random variation. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"\nexample = {\"input\": \"What are the main steps involved in the backpropagation process as described in the article?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:46:48.155393Z","iopub.execute_input":"2024-09-10T13:46:48.155715Z","iopub.status.idle":"2024-09-10T13:47:51.867871Z","shell.execute_reply.started":"2024-09-10T13:46:48.155681Z","shell.execute_reply":"2024-09-10T13:47:51.866914Z"},"trusted":true},"execution_count":64,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":64,"output_type":"execute_result","data":{"text/plain":"{'response': 'The provided text describes backpropagation as a process with these main steps:\\n\\n1. **Forward Pass:** Calculate outputs based on inputs and a set of weights.\\n2. **Loss Calculation:** Compare the outputs to target values using a loss function.\\n3. **Backward Pass:** Calculate \"weight change requests\" or gradients to reduce the loss. \\n\\n\\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"\nexample = {\"input\": \"What is the significance of loss functions in training a neural network?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:47:51.869308Z","iopub.execute_input":"2024-09-10T13:47:51.869714Z","iopub.status.idle":"2024-09-10T13:48:50.453124Z","shell.execute_reply.started":"2024-09-10T13:47:51.869668Z","shell.execute_reply":"2024-09-10T13:48:50.452134Z"},"trusted":true},"execution_count":65,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":65,"output_type":"execute_result","data":{"text/plain":"{'response': \"Loss functions measure how well a neural network's predictions match the actual target values. They quantify the difference between the predicted output and the desired output. This difference is then used to adjust the network's weights during the training process, aiming to minimize the loss and improve the network's accuracy. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \" explain the role of the sigmoid activation function in the neural network's calculations?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:48:50.454312Z","iopub.execute_input":"2024-09-10T13:48:50.454627Z","iopub.status.idle":"2024-09-10T13:49:52.471849Z","shell.execute_reply.started":"2024-09-10T13:48:50.454593Z","shell.execute_reply":"2024-09-10T13:49:52.470846Z"},"trusted":true},"execution_count":66,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":66,"output_type":"execute_result","data":{"text/plain":"{'response': 'The sigmoid activation function squashes the input values to a range between 0 and 1. This makes it particularly useful in the output layer of a neural network for binary classification problems, where the output represents the probability of an input belonging to a particular class. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"why is it beneficial for practitioners to understand the mechanics of backpropagation rather than treating it as a black box?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:49:52.473204Z","iopub.execute_input":"2024-09-10T13:49:52.473528Z","iopub.status.idle":"2024-09-10T13:50:52.375411Z","shell.execute_reply.started":"2024-09-10T13:49:52.473493Z","shell.execute_reply":"2024-09-10T13:50:52.374387Z"},"trusted":true},"execution_count":67,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":67,"output_type":"execute_result","data":{"text/plain":"{'response': 'Understanding backpropagation allows practitioners to debug problems that may arise during the training process. Treating it as a black box can lead to difficulties in identifying and resolving issues. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are the two key ideas introduced by LightGBM that make it more efficient than other gradient boosting implementations?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:50:52.376789Z","iopub.execute_input":"2024-09-10T13:50:52.377194Z","iopub.status.idle":"2024-09-10T13:51:50.537585Z","shell.execute_reply.started":"2024-09-10T13:50:52.377156Z","shell.execute_reply":"2024-09-10T13:51:50.536683Z"},"trusted":true},"execution_count":68,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":68,"output_type":"execute_result","data":{"text/plain":"{'response': 'The two key ideas introduced by LightGBM are Gradient-based One-Side Sampling (GOSS) and Exclusive Feature Bundling (EFB). \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How does Gradient-based One-Side Sampling (GOSS) work to speed up learning in LightGBM?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:51:50.538857Z","iopub.execute_input":"2024-09-10T13:51:50.539674Z","iopub.status.idle":"2024-09-10T13:52:38.924699Z","shell.execute_reply.started":"2024-09-10T13:51:50.539624Z","shell.execute_reply":"2024-09-10T13:52:38.923793Z"},"trusted":true},"execution_count":69,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":69,"output_type":"execute_result","data":{"text/plain":"{'response': 'Gradient-based One-Side Sampling (GOSS) speeds up learning in LightGBM by focusing on training examples with larger gradients. It excludes a significant proportion of data instances with small gradients, using only the rest to estimate the information gain. This is because data instances with larger gradients play a more important role in the computation of information gain. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What is the purpose of Exclusive Feature Bundling (EFB) in LightGBM, and how does it relate to feature selection?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:52:38.925858Z","iopub.execute_input":"2024-09-10T13:52:38.926184Z","iopub.status.idle":"2024-09-10T13:53:31.276051Z","shell.execute_reply.started":"2024-09-10T13:52:38.926149Z","shell.execute_reply":"2024-09-10T13:53:31.275134Z"},"trusted":true},"execution_count":70,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":70,"output_type":"execute_result","data":{"text/plain":"{'response': 'Exclusive Feature Bundling (EFB) in LightGBM groups mutually exclusive features (often sparse categorical features) into bundles. This reduces the number of features, acting as a form of automatic feature selection, and improves efficiency without significantly sacrificing accuracy. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are the key principles of writing clean, maintainable Python code, and how do they impact software scalability and readability?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:53:31.277177Z","iopub.execute_input":"2024-09-10T13:53:31.277474Z","iopub.status.idle":"2024-09-10T13:54:31.635390Z","shell.execute_reply.started":"2024-09-10T13:53:31.277441Z","shell.execute_reply":"2024-09-10T13:54:31.634254Z"},"trusted":true},"execution_count":71,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\nAn error occurred: Try to change the question.\n","output_type":"stream"}]},{"cell_type":"code","source":"example = {\"input\": \"How do Python dictionaries work internally, and what makes them an efficient data structure for key-value pair storage?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:40:34.168778Z","iopub.execute_input":"2024-09-10T14:40:34.169781Z","iopub.status.idle":"2024-09-10T14:40:40.807759Z","shell.execute_reply.started":"2024-09-10T14:40:34.169723Z","shell.execute_reply":"2024-09-10T14:40:40.806695Z"},"trusted":true},"execution_count":94,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":94,"output_type":"execute_result","data":{"text/plain":"{'response': 'Python dictionaries are implemented as hash tables. A hash table uses a hash function to map keys to indices in an array. This allows for very efficient key-value lookups, as they can be done in O(1) time on average. \\n\\nCPython, the reference implementation of Python, uses a technique called \"compact dictionaries\" to further optimize dictionary performance. This involves storing the dictionary entries in a separate dense array, and the hash table only stores indices to this array. This reduces space usage and improves iteration speed. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What is the transformer model, and how has it revolutionized natural language processing tasks?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:54:37.852660Z","iopub.execute_input":"2024-09-10T13:54:37.853065Z","iopub.status.idle":"2024-09-10T13:55:12.255120Z","shell.execute_reply.started":"2024-09-10T13:54:37.853018Z","shell.execute_reply":"2024-09-10T13:55:12.254178Z"},"trusted":true},"execution_count":73,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":73,"output_type":"execute_result","data":{"text/plain":"{'response': 'A transformer model is a neural network architecture that revolutionized natural language processing (NLP) by using a mechanism called \"self-attention\" to understand relationships between words in a sequence, even those that are far apart. This allows transformers to capture long-range dependencies in text, leading to improved performance in tasks like machine translation, text summarization, and question answering. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What techniques can be used to make machine learning models like XGBoost more interpretable, and why is model interpretability crucial in data science?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:55:12.256282Z","iopub.execute_input":"2024-09-10T13:55:12.256583Z","iopub.status.idle":"2024-09-10T13:56:02.174889Z","shell.execute_reply.started":"2024-09-10T13:55:12.256550Z","shell.execute_reply":"2024-09-10T13:56:02.173996Z"},"trusted":true},"execution_count":74,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":74,"output_type":"execute_result","data":{"text/plain":"{'response': 'Techniques to improve the interpretability of XGBoost models include using feature importance measures like weight, cover, and gain. \\n\\nModel interpretability is crucial in data science because it allows us to understand how a model arrives at its predictions. This is essential for building trust in the model, identifying potential biases, and ensuring responsible use, especially in high-stakes situations where explanations are required. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How do convolutional layers in CNNs like AlexNet and VGGNet function, and what are their advantages for image processing tasks?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:56:02.176146Z","iopub.execute_input":"2024-09-10T13:56:02.176532Z","iopub.status.idle":"2024-09-10T13:56:51.569128Z","shell.execute_reply.started":"2024-09-10T13:56:02.176488Z","shell.execute_reply":"2024-09-10T13:56:51.568129Z"},"trusted":true},"execution_count":75,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":75,"output_type":"execute_result","data":{"text/plain":"{'response': 'Convolutional layers in CNNs like AlexNet and VGGNet use filters (also called kernels) to detect patterns in the input image. These filters slide across the image, performing element-wise multiplications and additions to produce a feature map highlighting specific features like edges, corners, or textures. \\n\\nTheir advantages for image processing tasks include:\\n\\n* **Feature Extraction:** They automatically learn relevant features from the data, eliminating the need for manual feature engineering.\\n* **Spatial Invariance:** They are robust to small shifts in the position of objects within the image.\\n* **Parameter Sharing:** Filters are applied across the entire image, reducing the number of parameters compared to fully connected layers. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How is statistical significance determined in A/B testing, and why is it important for making data-driven decisions in business and research?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:56:51.570302Z","iopub.execute_input":"2024-09-10T13:56:51.570635Z","iopub.status.idle":"2024-09-10T13:57:51.020249Z","shell.execute_reply.started":"2024-09-10T13:56:51.570586Z","shell.execute_reply":"2024-09-10T13:57:51.019342Z"},"trusted":true},"execution_count":76,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":76,"output_type":"execute_result","data":{"text/plain":"{'response': 'Statistical significance is determined by calculating a p-value. The p-value represents the probability of observing the obtained results (or more extreme results) if the null hypothesis (that there is no relationship or effect) is actually true. \\n\\nIf the p-value is below a pre-determined significance level (often 0.05), we reject the null hypothesis and conclude that the observed effect is statistically significant. This means the results are unlikely to have occurred by chance alone and provide evidence to support the alternative hypothesis.\\n\\nStatistical significance is crucial for making data-driven decisions because it helps us determine if observed differences or relationships are real and not just random fluctuations. In business and research, it allows us to:\\n\\n* **Make informed decisions:** By identifying statistically significant trends, businesses can make better decisions about marketing campaigns, product development, and resource allocation.\\n* **Support research findings:** In research, statistical significance helps validate hypotheses and draw meaningful conclusions from data.\\n* **Reduce bias:** Using statistical significance helps minimize the influence of subjective interpretation and reduces the risk of drawing false conclusions based on chance. \\n\\n\\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are the most common testing strategies in Python, and how do unit tests, integration tests, and functional tests differ from one another?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T13:57:51.021597Z","iopub.execute_input":"2024-09-10T13:57:51.022006Z","iopub.status.idle":"2024-09-10T13:58:40.166171Z","shell.execute_reply.started":"2024-09-10T13:57:51.021940Z","shell.execute_reply":"2024-09-10T13:58:40.165119Z"},"trusted":true},"execution_count":77,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":77,"output_type":"execute_result","data":{"text/plain":"{'response': \"The most common testing strategies in Python include unit testing, integration testing, and functional testing. \\n\\n* **Unit tests** focus on isolating and testing individual components (functions, classes) of your code in isolation. They verify that each unit works as expected without relying on other parts of the system.\\n\\n* **Integration tests** verify how different units of your code work together when combined. They involve testing the interactions between components and ensure they seamlessly integrate.\\n\\n* **Functional tests** test the complete functionality of your application from a user's perspective. They simulate real-world scenarios and check if the application behaves as expected for various user interactions. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How is artificial intelligence transforming the finance sector, and what are some of the most impactful applications of AI in financial services?\"}\n\n\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:03:19.706426Z","iopub.execute_input":"2024-09-10T14:03:19.706782Z","iopub.status.idle":"2024-09-10T14:03:23.097029Z","shell.execute_reply.started":"2024-09-10T14:03:19.706749Z","shell.execute_reply":"2024-09-10T14:03:23.096201Z"},"trusted":true},"execution_count":81,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":81,"output_type":"execute_result","data":{"text/plain":"{'response': 'Artificial intelligence is transforming the finance sector by automating tasks, enhancing risk management, and providing personalized services. \\n\\nSome impactful applications include:\\n\\n* **Algorithmic trading:** AI can analyze market data and execute trades with speed and accuracy.\\n* **Robo-advisors:** AI-powered platforms offer automated financial planning and investment advice.\\n* **Fraud detection:** AI algorithms can identify suspicious transactions and patterns, reducing financial crime.\\n* **Customer service:** Chatbots powered by AI can handle routine customer inquiries, freeing up human agents for complex issues.\\n* **Credit risk assessment:** AI can analyze vast amounts of data to assess creditworthiness and make lending decisions. \\n\\n\\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What are vector databases, and how do they improve retrieval in large language models and other AI systems?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:03:23.098401Z","iopub.execute_input":"2024-09-10T14:03:23.098736Z","iopub.status.idle":"2024-09-10T14:04:10.181459Z","shell.execute_reply.started":"2024-09-10T14:03:23.098701Z","shell.execute_reply":"2024-09-10T14:04:10.180510Z"},"trusted":true},"execution_count":82,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 2\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":82,"output_type":"execute_result","data":{"text/plain":"{'response': 'Vector databases are specialized databases designed to efficiently store and retrieve vector embeddings. \\n\\nThey are crucial for AI systems, especially large language models (LLMs), because LLMs generate these embeddings to represent semantic information. Traditional databases struggle with the complexity and scale of vector data, making vector databases essential for tasks like semantic search, long-term memory, and knowledge support in AI. \\n\\nVector databases use algorithms like Approximate Nearest Neighbor (ANN) search to quickly find vectors most similar to a given query, enabling fast and accurate retrieval of relevant information. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What type of chains are in Langchain?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:04:10.182516Z","iopub.execute_input":"2024-09-10T14:04:10.182791Z","iopub.status.idle":"2024-09-10T14:05:06.539600Z","shell.execute_reply.started":"2024-09-10T14:04:10.182761Z","shell.execute_reply":"2024-09-10T14:05:06.538707Z"},"trusted":true},"execution_count":83,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":83,"output_type":"execute_result","data":{"text/plain":"{'response': 'The document mentions LLM Chains, Sequential Chains, and Router Chains. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How can context size be made smaller in Langchain?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:05:06.540991Z","iopub.execute_input":"2024-09-10T14:05:06.541761Z","iopub.status.idle":"2024-09-10T14:05:51.377767Z","shell.execute_reply.started":"2024-09-10T14:05:06.541711Z","shell.execute_reply":"2024-09-10T14:05:51.376863Z"},"trusted":true},"execution_count":84,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":84,"output_type":"execute_result","data":{"text/plain":"{'response': \"You can make context size smaller in LangChain by splitting documents into smaller chunks. LangChain's `contextual_compression` module provides tools for this. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How is AI being used to accelerate scientific discovery, and what are the potential benefits and challenges associated with its adoption in research?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:05:51.379123Z","iopub.execute_input":"2024-09-10T14:05:51.379502Z","iopub.status.idle":"2024-09-10T14:06:41.737903Z","shell.execute_reply.started":"2024-09-10T14:05:51.379456Z","shell.execute_reply":"2024-09-10T14:06:41.736824Z"},"trusted":true},"execution_count":85,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":85,"output_type":"execute_result","data":{"text/plain":"{'response': 'AI accelerates scientific discovery by identifying patterns in large datasets, predicting outcomes, and simulating complex systems. This can lead to faster drug development, improved climate modeling, and breakthroughs in fields like genomics and materials science. \\n\\nPotential benefits include increased efficiency, accuracy, and the ability to tackle complex problems beyond human capacity. \\n\\nHowever, challenges include ensuring data quality and bias mitigation, addressing ethical concerns surrounding AI-driven decision-making, and the need for transparency and explainability in AI models. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How do multi-agent workflows in LangChain improve retrieval-augmented generation (RAG) systems, and what are the key use cases?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:06:41.739375Z","iopub.execute_input":"2024-09-10T14:06:41.739819Z","iopub.status.idle":"2024-09-10T14:07:29.876520Z","shell.execute_reply.started":"2024-09-10T14:06:41.739768Z","shell.execute_reply":"2024-09-10T14:07:29.875609Z"},"trusted":true},"execution_count":86,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":86,"output_type":"execute_result","data":{"text/plain":"{'response': 'Multi-agent workflows in LangChain enhance RAG systems by automating tasks within the workflow, such as data filtering, ranking, and selection. This improves efficiency and allows for more complex and dynamic RAG applications. \\n\\nKey use cases include building chatbots that can access and process information from multiple sources, creating personalized learning experiences, and automating tasks in research and development. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What is reinforcement learning from AI feedback, and how does it improve the training and performance of AI models?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:07:29.877808Z","iopub.execute_input":"2024-09-10T14:07:29.878445Z","iopub.status.idle":"2024-09-10T14:08:32.523571Z","shell.execute_reply.started":"2024-09-10T14:07:29.878398Z","shell.execute_reply":"2024-09-10T14:08:32.522666Z"},"trusted":true},"execution_count":87,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":87,"output_type":"execute_result","data":{"text/plain":"{'response': 'Reinforcement learning from AI feedback (RLAIF) is a technique where AI models provide feedback to other AI models during the reinforcement learning process. Instead of relying solely on human input, RLAIF leverages existing AI systems to evaluate actions and guide the learning of other agents. This AI feedback can take various forms, such as generating rewards, ranking responses, or suggesting improvements. \\n\\nRLAIF aims to improve training and performance by:\\n\\n* **Scaling training:** Automating the feedback loop makes training more efficient and less reliant on expensive human labor.\\n* **Potentially enhancing performance:** RLAIF has shown promising results, matching or even surpassing the performance of traditional reinforcement learning from human feedback (RLHF) in some cases. \\n\\n\\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"How is AI being used to accelerate scientific discovery, and what are the potential benefits and challenges associated with its adoption in research?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:08:32.524741Z","iopub.execute_input":"2024-09-10T14:08:32.525067Z","iopub.status.idle":"2024-09-10T14:09:23.209434Z","shell.execute_reply.started":"2024-09-10T14:08:32.525033Z","shell.execute_reply":"2024-09-10T14:09:23.208498Z"},"trusted":true},"execution_count":88,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":88,"output_type":"execute_result","data":{"text/plain":"{'response': 'AI accelerates scientific discovery by identifying patterns in large datasets, predicting outcomes, and simulating complex systems. This can lead to faster drug development, improved climate modeling, and breakthroughs in fields like genomics and materials science. \\n\\nPotential benefits include increased efficiency, accuracy, and the ability to tackle complex problems beyond human capacity. \\n\\nHowever, challenges include ensuring data quality and bias mitigation, addressing ethical concerns surrounding AI-driven decision-making, and the need for transparency and explainability in AI models. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Where are Byes theorem?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:34:20.636220Z","iopub.execute_input":"2024-09-10T14:34:20.636896Z","iopub.status.idle":"2024-09-10T14:34:25.759884Z","shell.execute_reply.started":"2024-09-10T14:34:20.636850Z","shell.execute_reply":"2024-09-10T14:34:25.759007Z"},"trusted":true},"execution_count":89,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 1\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":89,"output_type":"execute_result","data":{"text/plain":"{'response': \"Bayes' Theorem is a fundamental concept in probability theory. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"What is do do transformer models perform better than convolutional networks?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:35:54.199710Z","iopub.execute_input":"2024-09-10T14:35:54.200077Z","iopub.status.idle":"2024-09-10T14:36:00.706211Z","shell.execute_reply.started":"2024-09-10T14:35:54.200041Z","shell.execute_reply":"2024-09-10T14:36:00.705273Z"},"trusted":true},"execution_count":91,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: Yes\nFiltered documents count: 3\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":91,"output_type":"execute_result","data":{"text/plain":"{'response': \"Transformer models are able to handle long-range dependencies in sequences more effectively than convolutional networks. This is because the transformer's attention mechanism allows it to weigh the importance of different words in a sequence, regardless of their distance from each other. \\n\",\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"transformer?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:36:19.636451Z","iopub.execute_input":"2024-09-10T14:36:19.637303Z","iopub.status.idle":"2024-09-10T14:36:49.448854Z","shell.execute_reply.started":"2024-09-10T14:36:19.637247Z","shell.execute_reply":"2024-09-10T14:36:49.448010Z"},"trusted":true},"execution_count":92,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nGrader output for document: binary_score='yes'\nFinal decision - Perform web search: No\nFiltered documents count: 4\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":92,"output_type":"execute_result","data":{"text/plain":"{'response': 'A transformer model is a neural network that learns context and meaning by tracking relationships in sequential data, like words in a sentence. They use a mechanism called \"self-attention\" to weigh the importance of different words when processing information. \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"example = {\"input\": \"Kaunas - Vilnius in km?\" }\nresponse = await predict_custom_agent_answer(example)\nresponse","metadata":{"execution":{"iopub.status.busy":"2024-09-10T14:41:31.475142Z","iopub.execute_input":"2024-09-10T14:41:31.476127Z","iopub.status.idle":"2024-09-10T14:42:03.845702Z","shell.execute_reply.started":"2024-09-10T14:41:31.476078Z","shell.execute_reply":"2024-09-10T14:42:03.844864Z"},"trusted":true},"execution_count":96,"outputs":[{"name":"stdout","text":"Grader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nGrader output for document: binary_score='no'\nFinal decision - Perform web search: Yes\nFiltered documents count: 0\n---CHECK HALLUCINATIONS---\n generation number: 1\n---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---\n---GRADE GENERATION vs QUESTION---\n---DECISION: GENERATION ADDRESSES QUESTION---\n","output_type":"stream"},{"execution_count":96,"output_type":"execute_result","data":{"text/plain":"{'response': '92 kilometers \\n',\n 'steps': ['question_asked',\n 'promt guard',\n 'retrieve_documents',\n 'grade_document_retrieval',\n 'web_search',\n 'generate_answer']}"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}