{ "cells": [ { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/pedro/.local/lib/python3.10/site-packages/urllib3/connectionpool.py:1045: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.vodafone.pt'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings\n", " warnings.warn(\n", "Fetching pages: 9%|8 | 120/1361 [00:26<03:06, 6.64it/s]Error fetching https://www.vodafone.pt/ajuda/artigos/servicos-apps/seguranca-informatica/o-que-e-um-csirt.html with attempt 1/3: Server disconnected. Retrying...\n", "Fetching pages: 100%|##########| 1361/1361 [03:41<00:00, 6.16it/s]\n" ] } ], "source": [ "import nest_asyncio\n", "\n", "nest_asyncio.apply()\n", "\n", "from langchain.document_loaders import SitemapLoader\n", "\n", "from bs4 import BeautifulSoup\n", "\n", "import re\n", "\n", "# Wrap the function in a list\n", "filter_urls = ['^http(s)?://(www\\.)?vodafone.(pt)/(ajuda)/(artigos).*$'] #apenas https://www.vodafone.pt/ajuda/artigos/...\n", "\n", "\n", "sitemap_loader = SitemapLoader(\n", " web_path=\"https://www.vodafone.pt/sitemap.xml\",\n", " filter_urls=filter_urls,\n", ")\n", "\n", "sitemap_loader.requests_per_second = 10\n", "# Optional: avoid `[SSL: CERTIFICATE_VERIFY_FAILED]` issue\n", "sitemap_loader.requests_kwargs = {\"verify\": False}\n", "\n", "docs = sitemap_loader.load()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'docs' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/home/pedro/llama2_local/testbhe.ipynb Cell 2\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m docs[\u001b[39m0\u001b[39m]\n", "\u001b[0;31mNameError\u001b[0m: name 'docs' is not defined" ] } ], "source": [ "docs[0]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nComo ativo ou desativo os alertas Vodafone?\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPlease click here if you are not redirected within a few seconds.\\nSkip to content\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\xa0\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nCarrinho de compras\\n\\n{{counter ? counter : itemsNumber}}\\nitems\\n\\n\\n\\n\\n\\n\\nCarrinho\\n\\n\\n\\n\\n\\n\\nCarrinho de compras\\n\\nproduto\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{i18nLabels.basketNotEmpty}}\\n\\n\\n{{i18nLabels.reminderDescription}}\\n\\n{{i18nLabels.goToCart}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPesquisar\\n\\n\\n\\n\\nPesquisa\\n\\n\\n\\n\\nMy Vodafone Menu\\n\\n\\n\\n\\nLogin\\n\\n{{i18nLabels.loginButton}}\\n{{i18nLabels.notOpenAgainButton}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nClose dialog\\n\\n\\n\\n\\n\\n\\n{{searchOverlayHeading}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nProcurar\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_close_dialog_span}}\\n\\n\\n\\n\\n{{login_page_title}}\\n\\n\\n\\n\\n{{login_navigation_title}}\\n\\n{{register_navigation_title}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{forgot_password_question}}\\n\\n\\n\\n{{login_name_reset_number}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{forgot_password_cancel}}\\n\\n\\n\\n\\n\\n\\n\\n\\n{{forgot_password_second_title}}\\n\\n{{forgot_password_second_text_sms}}\\n{{forgot_password_second_text_email}}\\n\\n{{forgot_password_next}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_label}}\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_name_label}}\\n\\n\\n\\n\\n{{login_form_password_label}}\\n\\n\\n\\n\\n\\n\\n{{login_form_remember_me}}\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_recover_button}}\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_back_to_login_button}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_activation_back_to_login_button}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_recover_password}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_create_user_name}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_create_user_email}}\\n\\n\\n{{login_form_create_email_login_text}}\\n\\n\\n{{login_form_create_user_email_confirmation}}\\n\\n\\n\\n\\n{{login_form_create_user_phone_number}}\\n\\n\\n\\n\\n\\n\\n{{login_form_create_user_password}}\\n\\n\\n\\n\\n\\n\\n{{login_form_create_user_confirm_password}}\\n\\n\\n\\n\\n{{login_form_create_user_confirm_label}}\\n\\n{{login_form_create_user_terms_conditions}}\\n\\n{{login_form_create_user_vigentes}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_activation_back_to_login_button}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_create_continue_button}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_close_dialog_span}}\\n\\n\\n\\n{{login_form_reset_title}}\\n\\n\\n\\n\\n\\n\\n{{login_form_reset_first_field}}\\n\\n\\n\\n\\n\\n{{login_form_reset_second_field}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{login_form_reset_error_validate_field_invalid_key_recover_button}}\\n\\n\\n\\n{{login_form_reset_error_validate_field_invalid_key_to_login_button}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{i18n.closeDialog}}\\n\\n\\n\\n\\n\\n{{i18n.youHaveTwoCarts}}\\n\\n\\n{{i18n.chooseOne}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{item.quantity}}{{CONSTANTS.TIMES}}\\n\\n\\n\\n\\n{{product.name}}\\n\\n\\n{{i18n.color}} {{CONSTANTS.SPACE}} {{product.color}}\\n{{i18n.storage}} {{CONSTANTS.SPACE}} {{product.storage}}\\n\\n\\n{{i18n.included}}\\n\\n\\n\\n{{formatPrice(getItemEURValue(item))}}\\n{{CONSTANTS.SPACE}} {{CONSTANTS.PLUS}} {{CONSTANTS.SPACE}} {{getItemPOINTSValue(item)}} {{CONSTANTS.SPACE}}\\n{{CONSTANTS.SPACE}}{{i18n.points}}\\n\\n\\n{{checkIfFreePrice(getItemEURValue(item),item.eshopProductDTO.variants[0].packProducts[0].productType)}}\\n\\n\\n\\n\\n\\n{{item.quantity}}{{CONSTANTS.TIMES}}\\n\\n\\n\\n\\n{{i18n.color}} {{CONSTANTS.SPACE}} {{item.eshopProductDTO.variants[0].color}}\\n\\n\\n{{i18n.storage}} {{CONSTANTS.SPACE}} {{item.eshopProductDTO.variants[0].storage}}\\n\\n\\n\\n{{formatPrice(getItemEURValue(item))}}\\n{{CONSTANTS.SPACE}} {{CONSTANTS.PLUS}} {{CONSTANTS.SPACE}} {{getItemPOINTSValue(item)}} {{CONSTANTS.SPACE}}\\n{{CONSTANTS.SPACE}}{{i18n.points}}\\n\\n\\n{{checkIfFreePrice(getItemEURValue(item),item.eshopProductDTO.productType)}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{i18n.afterSelectionDesc}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nComo ativo ou desativo os alertas Vodafone?\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nOs alertas podem ser ativados ou desativados via Portal Móvel Vodafone, Portal WAP Vodafone ou SMS.\\nConsulte a informação dos alertas específicos na área Apps e Serviços do site.\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nFollow us\\nSocial\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nContacte-nos\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTOBi WhatsApp\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nTOBi Chat\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nFale connosco\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nSite map\\n\\n\\n\\n\\n\\nProdutos e Serviços\\n\\n\\n\\n\\nPacotes fibra\\n\\n\\n\\n\\nTarifários móveis\\n\\n\\n\\n\\nTarifa Social de Internet\\n\\n\\n\\n\\nSmartphones\\n\\n\\n\\n\\nInternet Móvel\\n\\n\\n\\n\\nVodafone Travellers\\n\\n\\n\\n\\nVodafone TV PLAY\\n\\n\\n\\n\\n\\n\\n\\n\\nDestaques\\n\\n\\n\\n\\nBlack Friday 2023\\n\\n\\n\\n\\nVodafone Fiber to the Room\\n\\n\\n\\n\\nNovidades e Promoções\\n\\n\\n\\n\\nTv em todos os ecrãs\\n\\n\\n\\n\\nPortabilidade\\n\\n\\n\\n\\nSegunda Casa\\n\\n\\n\\n\\nVodafone Solutions\\n\\n\\n\\n\\nClube Viva\\n\\n\\n\\n\\n\\n\\n\\n\\nSobre a Vodafone\\n\\n\\n\\n\\nA Vodafone\\n\\n\\n\\n\\nRede 5G\\n\\n\\n\\n\\nPress Releases\\n\\n\\n\\n\\nResponsabilidade Social\\n\\n\\n\\n\\nProjetos Financiados\\n\\n\\n\\n\\nFundação Vodafone\\n\\n\\n\\n\\nTrabalhar na Vodafone\\n\\n\\n\\n\\nVodafone Power Lab\\n\\n\\n\\n\\nDescontinuação 3G\\n\\n\\n\\n\\n\\n\\n\\n\\nSuporte\\n\\n\\n\\n\\nAjuda\\n\\n\\n\\n\\nVer Fatura\\n\\n\\n\\n\\nLojas\\n\\n\\n\\n\\nContactos\\n\\n\\n\\n\\nDownload Centre\\n\\n\\n\\n\\nComunidade Vodafone\\n\\n\\n\\n\\nEstado da encomenda\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nProdutos e Serviços\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPacotes fibra\\n\\n\\n\\n\\nTarifários móveis\\n\\n\\n\\n\\nTarifa Social de Internet\\n\\n\\n\\n\\nSmartphones\\n\\n\\n\\n\\nInternet Móvel\\n\\n\\n\\n\\nVodafone Travellers\\n\\n\\n\\n\\nVodafone TV PLAY\\n\\n\\n\\n\\n\\n\\n\\n\\nDestaques\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nBlack Friday 2023\\n\\n\\n\\n\\nVodafone Fiber to the Room\\n\\n\\n\\n\\nNovidades e Promoções\\n\\n\\n\\n\\nTv em todos os ecrãs\\n\\n\\n\\n\\nPortabilidade\\n\\n\\n\\n\\nSegunda Casa\\n\\n\\n\\n\\nVodafone Solutions\\n\\n\\n\\n\\nClube Viva\\n\\n\\n\\n\\n\\n\\n\\n\\nSuporte\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nAjuda\\n\\n\\n\\n\\nVer Fatura\\n\\n\\n\\n\\nLojas\\n\\n\\n\\n\\nContactos\\n\\n\\n\\n\\nDownload Centre\\n\\n\\n\\n\\nComunidade Vodafone\\n\\n\\n\\n\\nEstado da encomenda\\n\\n\\n\\n\\n\\n\\n\\n\\nSobre a Vodafone\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nA Vodafone\\n\\n\\n\\n\\nRede 5G\\n\\n\\n\\n\\nPress Releases\\n\\n\\n\\n\\nResponsabilidade Social\\n\\n\\n\\n\\nProjetos Financiados\\n\\n\\n\\n\\nFundação Vodafone\\n\\n\\n\\n\\nTrabalhar na Vodafone\\n\\n\\n\\n\\nVodafone Power Lab\\n\\n\\n\\n\\nDescontinuação 3G\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\nPrivacidade\\n\\n\\n\\n\\n\\n\\nPolítica Anticorrupção\\n\\n\\n\\n\\n\\n\\nConfiguração de Cookies\\n\\n\\n\\n\\n\\n\\nFeedback\\n\\n\\n\\n\\n\\n\\nOfertas Wholesale\\n\\n\\n\\n\\n\\n\\nEnglish\\n\\n\\n\\n\\n© {{year}} {{footerText}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n{{title}}\\n\\n\\n\\n\\n{{title}}\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n\\n'" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "bhe = pd.DataFrame(docs[0])\n", "bhe[1][0]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "import re\n", "\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.feature_extraction.text import TfidfTransformer\n", "from sklearn.naive_bayes import MultinomialNB\n" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "def extrair_informacoes(url):\n", " response = requests.get(url)\n", " soup = BeautifulSoup(response.content, 'html.parser')\n", " texto = soup.get_text()\n", " return texto\n", "\n", "def preprocessar_texto(lista_textos):\n", " # Result list to store preprocessed texts\n", " resultados = []\n", "\n", " for texto in lista_textos:\n", " # Remover HTML\n", " texto = re.sub('<.*?>', '', texto)\n", " # Converter para minúsculas\n", " texto = texto.lower()\n", " # Remover pontuação\n", " texto = re.sub('[^a-zA-Z0-9 \\n\\.]', '', texto)\n", " # Append the preprocessed text to the result list\n", " resultados.append(texto)\n", "\n", " return resultados\n", "\n", "def treinar_modelo(texto):\n", "\n", " count_vect = CountVectorizer()\n", " X_train_counts = count_vect.fit_transform(texto)\n", "\n", " tfidf_transformer = TfidfTransformer()\n", " X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)\n", "\n", " clf = MultinomialNB().fit(X_train_tfidf, texto)\n", " return clf\n", "\n", "# 5. Implementação do Chatbot\n", "def chatbot(pergunta, modelo):\n", " # Pré-processar a pergunta\n", " pergunta = preprocessar_texto(pergunta)\n", " # Usar o modelo para gerar uma resposta\n", " resposta = modelo.predict([pergunta])\n", " return resposta" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "um = list()\n", "dois = list()\n", "ext1 = extrair_informacoes('https://www.vodafone.pt/campanhas/tv-net-voz.html?c_id=Vodafone_VF-Exact-FTTH_Fixo_Google_AO_AO_Performance_Geral_na_na&c_name=bf23fixo&c_source=Google&c_medium=cpc&c_type=textads&c_phase=PF&c_goal=LE&c_product=FIXO&c_subproduct=Geral&c_vtype=na&c_term=na&gad_source=1&gclid=CjwKCAiAx_GqBhBQEiwAlDNAZmD1xHxEoAXzEfF9DiLYsXvRROkMihHD6d-uMozTUFhn5z44GS0g1BoCqm4QAvD_BwE')\n", "ext2 = extrair_informacoes('https://www.vodafone.pt')\n", "um.insert(0,ext1)\n", "um.insert(1,ext1)\n", "dois = preprocessar_texto(um)\n", "\n", "tres = treinar_modelo(dois)\n" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "dtype='numeric' is not compatible with arrays of bytes/strings.Convert your data to numeric values explicitly instead.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m/home/pedro/llama2_local/testbhe.ipynb Cell 7\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m chatbot(\u001b[39m'\u001b[39;49m\u001b[39mA vodafone é fixe?\u001b[39;49m\u001b[39m'\u001b[39;49m, tres)\n", "\u001b[1;32m/home/pedro/llama2_local/testbhe.ipynb Cell 7\u001b[0m line \u001b[0;36m3\n\u001b[1;32m 37\u001b[0m pergunta \u001b[39m=\u001b[39m preprocessar_texto(pergunta)\n\u001b[1;32m 38\u001b[0m \u001b[39m# Usar o modelo para gerar uma resposta\u001b[39;00m\n\u001b[0;32m---> 39\u001b[0m resposta \u001b[39m=\u001b[39m modelo\u001b[39m.\u001b[39;49mpredict([pergunta])\n\u001b[1;32m 40\u001b[0m \u001b[39mreturn\u001b[39;00m resposta\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/sklearn/naive_bayes.py:101\u001b[0m, in \u001b[0;36m_BaseNB.predict\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 88\u001b[0m \u001b[39mPerform classification on an array of test vectors X.\u001b[39;00m\n\u001b[1;32m 89\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[39m Predicted target values for X.\u001b[39;00m\n\u001b[1;32m 99\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 100\u001b[0m check_is_fitted(\u001b[39mself\u001b[39m)\n\u001b[0;32m--> 101\u001b[0m X \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_check_X(X)\n\u001b[1;32m 102\u001b[0m jll \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_joint_log_likelihood(X)\n\u001b[1;32m 103\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclasses_[np\u001b[39m.\u001b[39margmax(jll, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)]\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/sklearn/naive_bayes.py:574\u001b[0m, in \u001b[0;36m_BaseDiscreteNB._check_X\u001b[0;34m(self, X)\u001b[0m\n\u001b[1;32m 572\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_check_X\u001b[39m(\u001b[39mself\u001b[39m, X):\n\u001b[1;32m 573\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Validate X, used only in predict* methods.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 574\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_validate_data(X, accept_sparse\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mcsr\u001b[39;49m\u001b[39m\"\u001b[39;49m, reset\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m)\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/sklearn/base.py:605\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[0;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[1;32m 603\u001b[0m out \u001b[39m=\u001b[39m X, y\n\u001b[1;32m 604\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mnot\u001b[39;00m no_val_X \u001b[39mand\u001b[39;00m no_val_y:\n\u001b[0;32m--> 605\u001b[0m out \u001b[39m=\u001b[39m check_array(X, input_name\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mX\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mcheck_params)\n\u001b[1;32m 606\u001b[0m \u001b[39melif\u001b[39;00m no_val_X \u001b[39mand\u001b[39;00m \u001b[39mnot\u001b[39;00m no_val_y:\n\u001b[1;32m 607\u001b[0m out \u001b[39m=\u001b[39m _check_y(y, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mcheck_params)\n", "File \u001b[0;32m~/.local/lib/python3.10/site-packages/sklearn/utils/validation.py:946\u001b[0m, in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[1;32m 938\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 939\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mExpected 2D array, got 1D array instead:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39marray=\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\n\u001b[1;32m 940\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mReshape your data either using array.reshape(-1, 1) if \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 941\u001b[0m \u001b[39m\"\u001b[39m\u001b[39myour data has a single feature or array.reshape(1, -1) \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 942\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mif it contains a single sample.\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(array)\n\u001b[1;32m 943\u001b[0m )\n\u001b[1;32m 945\u001b[0m \u001b[39mif\u001b[39;00m dtype_numeric \u001b[39mand\u001b[39;00m \u001b[39mhasattr\u001b[39m(array\u001b[39m.\u001b[39mdtype, \u001b[39m\"\u001b[39m\u001b[39mkind\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mand\u001b[39;00m array\u001b[39m.\u001b[39mdtype\u001b[39m.\u001b[39mkind \u001b[39min\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mUSV\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m--> 946\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 947\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mdtype=\u001b[39m\u001b[39m'\u001b[39m\u001b[39mnumeric\u001b[39m\u001b[39m'\u001b[39m\u001b[39m is not compatible with arrays of bytes/strings.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 948\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mConvert your data to numeric values explicitly instead.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 949\u001b[0m )\n\u001b[1;32m 950\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m allow_nd \u001b[39mand\u001b[39;00m array\u001b[39m.\u001b[39mndim \u001b[39m>\u001b[39m\u001b[39m=\u001b[39m \u001b[39m3\u001b[39m:\n\u001b[1;32m 951\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 952\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mFound array with dim \u001b[39m\u001b[39m%d\u001b[39;00m\u001b[39m. \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m expected <= 2.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 953\u001b[0m \u001b[39m%\u001b[39m (array\u001b[39m.\u001b[39mndim, estimator_name)\n\u001b[1;32m 954\u001b[0m )\n", "\u001b[0;31mValueError\u001b[0m: dtype='numeric' is not compatible with arrays of bytes/strings.Convert your data to numeric values explicitly instead." ] } ], "source": [ "chatbot('A vodafone é fixe?', tres)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 2 }