{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import re" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "out_path = \"results/\"\n", "lens = [0, 4000,8000,16000,32000,64000,128000,500000,1000000,10000000]" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "!rm -r results/*" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMT ['100,0', '100,0', '99,9', '100,0', '100,0', '99,6', '99,1', '96,4', '94,2', '76,4']\n", "RMT-Retrieval ['100,0', '99,9', '99,8', '99,9', '99,9', '99,7', '99,5', '97,5', '97,4', '86,0']\n", "GPT4 ['100,0', '97,0', '93,0', '66,0', '43,0', '30,0', '24,0', '', '', '']\n", "GPT4 + RAG by sentences ['', '61,5', '59,0', '55,5', '55,5', '55,0', '55,5', '51,0', '51,0', '19,5']\n", "GPT4 + Retrieve sentences (new 100 samples) ['', '63,0', '61,0', '60,0', '60,0', '56,0', '55,0', '55,0', '52,0', '28,0']\n", "GPT4 + RAG by segments ['', '70,0', '58,0', '54,0', '42,0', '24,0', '16,0', '12,0', '12,0', '4,0']\n", "GPT-3.5 ['', '88,0', '44,0', '24,0', '', '', '', '', '', '']\n", "GPT-3.5 fine-tuned (trained on 100 samples) ['', '84,0', '72,0', '64,0', '', '', '', '', '', '']\n", "GPT-3.5 fine-tuned (trained on 1000 samples) ['', '94,0', '96,0', '95,0', '', '', '', '', '', '']\n", "ARMT ['', '99,9', '99,9', '99,9', '100,0', '100,0', '100,0', '99,9', '99,4', '97,4']\n", "Mistral medium (xxB) ['', '73,0', '75,0', '58,0', '33,0', '', '', '', '', '']\n" ] } ], "source": [ "task_name = 'qa1'\n", "qa1_results = '''RMT\t100,0\t100,0\t99,9\t100,0\t100,0\t99,6\t99,1\t96,4\t94,2\t76,4\n", "RMT-Retrieval\t100,0\t99,9\t99,8\t99,9\t99,9\t99,7\t99,5\t97,5\t97,4\t86,0\n", "GPT4\t100,0\t97,0\t93,0\t66,0\t43,0\t30,0\t24,0\t\t\t\n", "GPT4 + RAG by sentences\t\t61,5\t59,0\t55,5\t55,5\t55,0\t55,5\t51,0\t51,0\t19,5\n", "GPT4 + Retrieve sentences (new 100 samples)\t\t63,0\t61,0\t60,0\t60,0\t56,0\t55,0\t55,0\t52,0\t28,0\n", "GPT4 + RAG by segments\t\t70,0\t58,0\t54,0\t42,0\t24,0\t16,0\t12,0\t12,0\t4,0\n", "GPT-3.5\t\t88,0\t44,0\t24,0\t\t\t\t\t\t\n", "GPT-3.5 fine-tuned (trained on 100 samples)\t\t84,0\t72,0\t64,0\t\t\t\t\t\t\n", "GPT-3.5 fine-tuned (trained on 1000 samples)\t\t94,0\t96,0\t95,0\t\t\t\t\t\t\n", "ARMT\t\t99,9\t99,9\t99,9\t100,0\t100,0\t100,0\t99,9\t99,4\t97,4\n", "Mistral medium (xxB)\t\t73,0\t75,0\t58,0\t33,0\t\t\t\t\t'''\n", "results = qa1_results.split('\\n')\n", "for r in results:\n", " model_name = r.split('\\t')[0]\n", " numbers = r.split('\\t')[1:] \n", " print(model_name, numbers)\n", "\n", " model_dir = os.path.join(out_path, model_name)\n", " os.makedirs(model_dir, exist_ok=True)\n", "\n", " model_task_dir = os.path.join(model_dir, task_name)\n", " os.makedirs(model_task_dir, exist_ok=True)\n", "\n", " for l, n in zip(lens, numbers):\n", " len_file = os.path.join(model_task_dir, f'{l}.csv')\n", " n = re.sub(',', '.', n)\n", " try:\n", " n = float(n) / 100\n", " df = pd.DataFrame({\"result\": n}, index=[0])\n", " df.to_csv(len_file, index=False)\n", " except ValueError:\n", " n = None\n", " \n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "task_name = 'qa2'\n", "qa2_results = '''RMT\t97,7\t98,9\t98,4\t96,1\t87,4\t72,7\t56,3\t32\t25,5\t16,2\n", "RMT-Retrieval\t97,7\t98,0\t97,2\t93,4\t85,6\t71,6\t54,9\t31,8\t26,3\t13,0\n", "GPT4\t84,0\t72,0\t60,0\t52,0\t24,0\t4,0\t8,0\t\t\t\n", "ARMT\t\t99,8\t100,0\t100,0\t100,0\t100,0\t100,0\t99,7\t99,6\t81,7'''\n", "results = qa2_results.split('\\n')\n", "for r in results:\n", " model_name = r.split('\\t')[0]\n", " numbers = r.split('\\t')[1:] \n", "\n", " model_dir = os.path.join(out_path, model_name)\n", " os.makedirs(model_dir, exist_ok=True)\n", "\n", " model_task_dir = os.path.join(model_dir, task_name)\n", " os.makedirs(model_task_dir, exist_ok=True)\n", "\n", " for l, n in zip(lens, numbers):\n", " len_file = os.path.join(model_task_dir, f'{l}.csv')\n", " n = re.sub(',', '.', n)\n", " try:\n", " n = float(n) / 100\n", " df = pd.DataFrame({\"result\": n}, index=[0])\n", " df.to_csv(len_file, index=False)\n", " except ValueError:\n", " n = None\n", " \n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [], "source": [ "task_name = 'qa3'\n", "qa3_results = '''RMT\t94,4\t83,6\t73,8\t70,2\t61,8\t51,9\t42,9\t25,9\t24,8\t21\n", "RMT-Retrieval\t94,4\t83,8\t76,0\t72,0\t62,5\t52,9\t41,9\t25,5\t22,2\t16,4\n", "GPT4\t56,0\t32,0\t24,0\t28,0\t28,0\t12,0\t4,0\t\t\t\n", "ARMT\t\t90,9\t92,0\t92,7\t90,7\t88,3\t80,4\t67,9\t56,4\t27,5'''\n", "results = qa3_results.split('\\n')\n", "for r in results:\n", " model_name = r.split('\\t')[0]\n", " numbers = r.split('\\t')[1:] \n", "\n", " model_dir = os.path.join(out_path, model_name)\n", " os.makedirs(model_dir, exist_ok=True)\n", "\n", " model_task_dir = os.path.join(model_dir, task_name)\n", " os.makedirs(model_task_dir, exist_ok=True)\n", "\n", " for l, n in zip(lens, numbers):\n", " len_file = os.path.join(model_task_dir, f'{l}.csv')\n", " n = re.sub(',', '.', n)\n", " try:\n", " n = float(n) / 100\n", " df = pd.DataFrame({\"result\": n}, index=[0])\n", " df.to_csv(len_file, index=False)\n", " except ValueError:\n", " n = None\n", " \n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "qa4_results = '''RMT\t99,8\t82,3\t81,9\t79,2\t70,5\t51,2\t40\t29,4\t27,3\t17,2\n", "RMT-Retrieval\t99,8\t82,50\t79,70\t76,40\t72,20\t58,80\t50,10\t32,10\t26,00\t14,00\n", "GPT4\t100,0\t72,0\t60,0\t72,0\t64,0\t20,0\t36,0\t\t\t\n", "ARMT\t\t100,0\t100,0\t100,0\t100,0\t100,0\t100,0\t100,0\t99,8\t93,2'''\n", "\n", "task_name = 'qa4'\n", "results = qa4_results.split('\\n')\n", "for r in results:\n", " model_name = r.split('\\t')[0]\n", " numbers = r.split('\\t')[1:] \n", "\n", " model_dir = os.path.join(out_path, model_name)\n", " os.makedirs(model_dir, exist_ok=True)\n", "\n", " model_task_dir = os.path.join(model_dir, task_name)\n", " os.makedirs(model_task_dir, exist_ok=True)\n", "\n", " for l, n in zip(lens, numbers):\n", " len_file = os.path.join(model_task_dir, f'{l}.csv')\n", " n = re.sub(',', '.', n)\n", " try:\n", " n = float(n) / 100\n", " df = pd.DataFrame({\"result\": n}, index=[0])\n", " df.to_csv(len_file, index=False)\n", " except ValueError:\n", " n = None" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "qa5_results = '''RMT\t98,4\t99,3\t99,1\t97,4\t95,5\t88,5\t78,1\t56,4\t48\t27,3\n", "RMT-Retrieval\t98,4\t98,80\t98,90\t98,20\t93,60\t86,20\t77,40\t55,90\t49,90\t35,00\n", "GPT4\t96,0\t100,0\t84,0\t68,0\t52,0\t64,0\t48,0\t\t\t\n", "ARMT\t\t99,5\t99,3\t99,4\t98,9\t98,9\t98,8\t98,2\t97,8\t87,0'''\n", "\n", "task_name = 'qa5'\n", "results = qa5_results.split('\\n')\n", "for r in results:\n", " model_name = r.split('\\t')[0]\n", " numbers = r.split('\\t')[1:] \n", "\n", " model_dir = os.path.join(out_path, model_name)\n", " os.makedirs(model_dir, exist_ok=True)\n", "\n", " model_task_dir = os.path.join(model_dir, task_name)\n", " os.makedirs(model_task_dir, exist_ok=True)\n", "\n", " for l, n in zip(lens, numbers):\n", " len_file = os.path.join(model_task_dir, f'{l}.csv')\n", " n = re.sub(',', '.', n)\n", " try:\n", " n = float(n) / 100\n", " df = pd.DataFrame({\"result\": n}, index=[0])\n", " df.to_csv(len_file, index=False)\n", " except ValueError:\n", " n = None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 2 }