From 2cced11fa796214cba91993918c42d8d28e4bce1 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Mon, 24 Nov 2025 19:14:27 -0500 Subject: [PATCH 1/4] Delete 2025_11_23_demo_train_an_llm_with_cerebros.ipynb Deleted file version with a typo. --- ...1_23_demo_train_an_llm_with_cerebros.ipynb | 6561 ----------------- 1 file changed, 6561 deletions(-) delete mode 100644 2025_11_23_demo_train_an_llm_with_cerebros.ipynb diff --git a/2025_11_23_demo_train_an_llm_with_cerebros.ipynb b/2025_11_23_demo_train_an_llm_with_cerebros.ipynb deleted file mode 100644 index 9004212..0000000 --- a/2025_11_23_demo_train_an_llm_with_cerebros.ipynb +++ /dev/null @@ -1,6561 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Build our LLM From Scratch -\n", - "\n", - "## How Cerebros NotGPT works under the hood:\n", - "\n", - "\n", - "### This notebook demonstrates the end-to-end training pipeline that builds a small scale generative LLM from scratch, a small scale proof of concept for our own Cerebros NotGPT model, then fine tunes it on additional data.\n", - "\n", - "The process is divided into two main phases:\n", - "\n", - "- Phase I-a: Neural Architecture Search (NAS) - We use SimpleCerebrosRandomSearch to automatically discover an effective neural network architecture from a small dataset.\n", - "- Phase I-b: Extended Training - The best architecture found in Phase I-a is then trained on a larger dataset to improve its performance.\n", - "\n", - "Finally, the trained model is evaluated and serialized for future use.\n", - "\n", - "\n", - "## Setup and Configuration\n", - "\n", - "Note: This script is configured as a vanilla-scale demo environment (4 CPU / 16 GB RAM Linux with Python 3.12). No GPU is needed, and this will run in the free version of Google Colab. \n", - "\n", - "## Vanilla Demo\n", - "\n", - "- For production use, you would significantly increase the sample sizes and adjust other parameters accordingly.\n", - "- The quality of the text generated by this minimal demo (trained on 30 text samples at a sequence length of 40) does not represent the quality of a full-scale model generated from the same code.\n", - "- A script that can be modified to do such as availible at: https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/main/train_a_generative_llm.py" - ], - "metadata": { - "id": "nnsAHoJyWLed" - } - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NzJF6_JuWElV", - "outputId": "a0f3246f-0ccd-48ea-da55-86479bc0f93c" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Python 3.12.12\n" - ] - } - ], - "source": [ - "! python --version" - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Getting started: Download the repo and go to the main directory of the repo" - ], - "metadata": { - "id": "f6TD2XsKPJIY" - } - }, - { - "cell_type": "code", - "source": [ - "# Download the repo\n", - "! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "AcECFSs7WVsi", - "outputId": "9fd59935-35d4-4a08-9c8a-fb01fd3e4f03" - }, - "execution_count": 25, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Cloning into 'cerebros-core-algorithm-alpha'...\n", - "remote: Enumerating objects: 8036, done.\u001b[K\n", - "remote: Counting objects: 100% (1737/1737), done.\u001b[K\n", - "remote: Compressing objects: 100% (321/321), done.\u001b[K\n", - "remote: Total 8036 (delta 1612), reused 1449 (delta 1411), pack-reused 6299 (from 2)\u001b[K\n", - "Receiving objects: 100% (8036/8036), 65.90 MiB | 21.67 MiB/s, done.\n", - "Resolving deltas: 100% (3116/3116), done.\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# set the working directory\n", - "%cd cerebros-core-algorithm-alpha" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mCpJGfD2WfLj", - "outputId": "e0fe8c05-6154-41cd-f489-08cfd2ad0fa8" - }, - "execution_count": 26, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content/cerebros-core-algorithm-alpha/cerebros-core-algorithm-alpha\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Next install all dependencies.\n", - "\n", - "There are 2 requirement files:\n", - " - requirements.txt: The core requirements of the neural architecture search\n", - " - cicd-requirements.txt: Requirements for NLP and text generation" - ], - "metadata": { - "id": "yT4hPXOKPU_8" - } - }, - { - "cell_type": "code", - "source": [ - "# Install the requirements for the core algorithm\n", - "! pip install -r requirements.txt; pip install -r cicd-requirements.txt" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "nwElyEdpW90P", - "outputId": "170e2158-b7a9-49f0-ce63-22c4c7410f33" - }, - "execution_count": 27, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: jax==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 1)) (0.5.3)\n", - "Requirement already satisfied: jaxlib==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 2)) (0.5.3)\n", - "Requirement already satisfied: pendulum==3.0.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 3)) (3.0.0)\n", - "Collecting tensorflow==2.20.0 (from -r requirements.txt (line 4))\n", - " Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n", - "Collecting numpy==2.3.5 (from -r requirements.txt (line 5))\n", - " Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n", - "Requirement already satisfied: pandas==2.3.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 6)) (2.3.3)\n", - "Requirement already satisfied: pyvis==0.3.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 7)) (0.3.2)\n", - "Requirement already satisfied: plotly==5.20.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 8)) (5.20.0)\n", - "Requirement already satisfied: matplotlib==3.10.7 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 9)) (3.10.7)\n", - "Requirement already satisfied: imageio==2.37.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 10)) (2.37.2)\n", - "Requirement already satisfied: tqdm==4.67.1 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 11)) (4.67.1)\n", - "Requirement already satisfied: ml_dtypes>=0.4.0 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (0.5.4)\n", - "Requirement already satisfied: opt_einsum in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (3.4.0)\n", - "Requirement already satisfied: scipy>=1.11.1 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (1.16.3)\n", - "Requirement already satisfied: python-dateutil>=2.6 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2.9.0.post0)\n", - "Requirement already satisfied: tzdata>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2025.2)\n", - "Requirement already satisfied: time-machine>=2.6.0 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (3.1.0)\n", - "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.4.0)\n", - "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.6.3)\n", - "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.9.23)\n", - "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.6.0)\n", - "Requirement already satisfied: google_pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.2.0)\n", - "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (18.1.1)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.0)\n", - "Requirement already satisfied: protobuf>=5.28.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (5.29.5)\n", - "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.32.4)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (75.2.0)\n", - "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.17.0)\n", - "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.2.0)\n", - "Requirement already satisfied: typing_extensions>=3.6.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (4.15.0)\n", - "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.0.1)\n", - "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.76.0)\n", - "Collecting tensorboard~=2.20.0 (from tensorflow==2.20.0->-r requirements.txt (line 4))\n", - " Using cached tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n", - "Requirement already satisfied: keras>=3.10.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10.0)\n", - "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.15.1)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas==2.3.3->-r requirements.txt (line 6)) (2025.2)\n", - "Requirement already satisfied: ipython>=5.3.0 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (7.34.0)\n", - "Requirement already satisfied: jinja2>=2.9.6 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.1.6)\n", - "Requirement already satisfied: jsonpickle>=1.4.1 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (4.1.1)\n", - "Requirement already satisfied: networkx>=1.11 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.5)\n", - "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.12/dist-packages (from plotly==5.20.0->-r requirements.txt (line 8)) (8.5.0)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.3.3)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (4.60.1)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.4.9)\n", - "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (11.3.0)\n", - "Requirement already satisfied: pyparsing>=3 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (3.2.5)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.45.1)\n", - "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.19.2)\n", - "Requirement already satisfied: decorator in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.4.2)\n", - "Requirement already satisfied: pickleshare in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.5)\n", - "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (5.7.1)\n", - "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.52)\n", - "Requirement already satisfied: pygments in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (2.19.2)\n", - "Requirement already satisfied: backcall in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.0)\n", - "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.1)\n", - "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.9.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2>=2.9.6->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.3)\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (13.9.4)\n", - "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.0)\n", - "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.18.0)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.4.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.11)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2.5.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2025.11.12)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.1.3)\n", - "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.12/dist-packages (from jedi>=0.16->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.8.5)\n", - "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.12/dist-packages (from pexpect>4.3->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.0)\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.12/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.14)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (4.0.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.2)\n", - "Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.7 MB)\n", - "Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.6 MB)\n", - "Using cached tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n", - "Installing collected packages: numpy, tensorboard, tensorflow\n", - " Attempting uninstall: numpy\n", - " Found existing installation: numpy 1.26.4\n", - " Uninstalling numpy-1.26.4:\n", - " Successfully uninstalled numpy-1.26.4\n", - " Attempting uninstall: tensorboard\n", - " Found existing installation: tensorboard 2.19.0\n", - " Uninstalling tensorboard-2.19.0:\n", - " Successfully uninstalled tensorboard-2.19.0\n", - " Attempting uninstall: tensorflow\n", - " Found existing installation: tensorflow 2.19.1\n", - " Uninstalling tensorflow-2.19.1:\n", - " Successfully uninstalled tensorflow-2.19.1\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "scikit-learn 1.4.1.post1 requires numpy<2.0,>=1.19.5, but you have numpy 2.3.5 which is incompatible.\n", - "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n", - "tensorflow-text 2.19.0 requires tensorflow<2.20,>=2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n", - "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", - "numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.5 which is incompatible.\n", - "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", - "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n", - "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", - "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", - "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n", - "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", - "tf-keras 2.19.0 requires tensorflow<2.20,>=2.19, but you have tensorflow 2.20.0 which is incompatible.\n", - "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed numpy-2.3.5 tensorboard-2.20.0 tensorflow-2.20.0\n", - "Requirement already satisfied: tensorflow-text==2.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 1)) (2.19.0)\n", - "Requirement already satisfied: keras-nlp==0.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 2)) (0.19.0)\n", - "Requirement already satisfied: scikit-learn==1.4.1.post1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 3)) (1.4.1.post1)\n", - "Requirement already satisfied: tensorflow-hub==0.16.1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 4)) (0.16.1)\n", - "Requirement already satisfied: transformers==4.54.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 5)) (4.54.0)\n", - "Collecting tensorflow<2.20,>=2.19.0 (from tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n", - " Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n", - "Requirement already satisfied: keras-hub==0.19.0 in /usr/local/lib/python3.12/dist-packages (from keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.19.0)\n", - "Collecting numpy<2.0,>=1.19.5 (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3))\n", - " Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n", - "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.16.3)\n", - "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.5.2)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (3.6.0)\n", - "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (5.29.5)\n", - "Requirement already satisfied: tf-keras>=2.14.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (2.19.0)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.20.0)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.36.0)\n", - "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (25.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (6.0.3)\n", - "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2024.11.6)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.32.4)\n", - "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.21.4)\n", - "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.7.0)\n", - "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.67.1)\n", - "Requirement already satisfied: keras>=3.5 in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (3.10.0)\n", - "Requirement already satisfied: absl-py in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (1.4.0)\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (13.9.4)\n", - "Requirement already satisfied: kagglehub in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.3.13)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.3.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.15.0)\n", - "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (1.2.0)\n", - "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.6.3)\n", - "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (25.9.23)\n", - "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.6.0)\n", - "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.2.0)\n", - "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (18.1.1)\n", - "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.4.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (75.2.0)\n", - "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.17.0)\n", - "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.2.0)\n", - "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (2.0.1)\n", - "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.76.0)\n", - "Collecting tensorboard~=2.19.0 (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n", - " Using cached tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)\n", - "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.15.1)\n", - "Requirement already satisfied: ml-dtypes<1.0.0,>=0.5.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.5.4)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.4.4)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.11)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.5.0)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.11.12)\n", - "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.45.1)\n", - "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.0)\n", - "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.18.0)\n", - "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.10)\n", - "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.7.2)\n", - "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.1.3)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (4.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (2.19.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.12/dist-packages (from werkzeug>=1.0.1->tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.0.3)\n", - "Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)\n", - "Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (645.0 MB)\n", - "Using cached tensorboard-2.19.0-py3-none-any.whl (5.5 MB)\n", - "Installing collected packages: numpy, tensorboard, tensorflow\n", - " Attempting uninstall: numpy\n", - " Found existing installation: numpy 2.3.5\n", - " Uninstalling numpy-2.3.5:\n", - " Successfully uninstalled numpy-2.3.5\n", - " Attempting uninstall: tensorboard\n", - " Found existing installation: tensorboard 2.20.0\n", - " Uninstalling tensorboard-2.20.0:\n", - " Successfully uninstalled tensorboard-2.20.0\n", - " Attempting uninstall: tensorflow\n", - " Found existing installation: tensorflow 2.20.0\n", - " Uninstalling tensorflow-2.20.0:\n", - " Successfully uninstalled tensorflow-2.20.0\n", - "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", - "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n", - "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", - "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", - "pytensor 2.35.1 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.\n", - "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n", - "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", - "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", - "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.19.1 which is incompatible.\n", - "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", - "shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.\n", - "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n", - "\u001b[0mSuccessfully installed numpy-1.26.4 tensorboard-2.19.0 tensorflow-2.19.1\n" - ] - }, - { - "output_type": "display_data", - "data": { - "application/vnd.colab-display-data+json": { - "pip_warning": { - "packages": [ - "numpy", - "tensorflow" - ] - }, - "id": "d3a167bbbde043ef9a994c35060fda79" - } - }, - "metadata": {} - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# **RESTART THE SESSION**\n", - "\n", - "Then proceed to the next cell which imports all necessary libraries and defines global constants and hyperparameters for the entire pipeline.\n" - ], - "metadata": { - "id": "v69rLBcmXyGD" - } - }, - { - "cell_type": "code", - "source": [ - "! ls" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ubtKyfBQzFEW", - "outputId": "6cbe44e6-3ce7-4227-982a-88d0d36d2205" - }, - "execution_count": 1, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "cerebros-core-algorithm-alpha sample_data\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# 1. # **ONLY IF** the directory cerebros-core-algorithm-alpha is not still\n", - "# there, clone the directory again.\n", - "# ! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git\n", - "\n", - "# 2. Set the working directory (in the new session) - DO run this.\n", - "%cd cerebros-core-algorithm-alpha" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NemXTsYgfE0s", - "outputId": "ca92342f-1f82-42ee-8562-980b1c8dd849" - }, - "execution_count": 2, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content/cerebros-core-algorithm-alpha\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Verify we are in the right place:\n", - "! pwd" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "D3K4dSVQhrIc", - "outputId": "5a45fa94-1bb3-46ce-c362-27f456221fd6" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/content/cerebros-core-algorithm-alpha\n" - ] - } - ] - }, - { - "cell_type": "code", - "source": [ - "# Standard library imports\n", - "import subprocess\n", - "import time\n", - "from gc import collect\n", - "\n", - "# Third-party library imports\n", - "import tensorflow as tf\n", - "import pandas as pd\n", - "import pendulum\n", - "from transformers import AutoTokenizer\n", - "from sklearn.model_selection import train_test_split\n", - "\n", - "# Cerebros specific imports\n", - "from cerebros.units.units import DenseUnit\n", - "from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search import SimpleCerebrosRandomSearch\n", - "from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component import (\n", - " zero_7_exp_decay,\n", - " zero_95_exp_decay,\n", - " simple_sigmoid\n", - ")\n", - "from cerebrosllmutils.llm_utils import (\n", - " prepare_data,\n", - " InterleavedRoPE,\n", - " Perplexity,\n", - " CerebrosNotGPTConfig,\n", - " CerebrosNotGPT,\n", - " WarmupCosineDecayRestarts\n", - ")\n", - "\n", - "# Import the data source: Format List[str]\n", - "from vanilladatasets.web_english_bible import samples as bible\n", - "\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "WKCdCv96X4YX", - "outputId": "875f6626-4f4b-426c-c697-da9f186e440a" - }, - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/jaxlib/plugin_support.py:71: RuntimeWarning: JAX plugin jax_cuda12_plugin version 0.7.2 is installed, but it is not compatible with the installed jaxlib version 0.5.3, so it will not be used.\n", - " warnings.warn(\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Data and Training Constants\n", - "\n", - "These parameters control the amount of data used and the behavior of the training stages.\n", - "\n", - "- **PHASE_I_A_SAMPLES_TO_CREATE**: Size of the subset of the dataset used for the NAS (Neural Architecture Search) stage (number of text samples).\n", - "- **PHASE_I_B_SAMPLES_TO_CREATE**: Number of samples to use for the main training task stage after Neural Architecture Search is completed.\n", - "- **PHASE_I_B_VAL_SPLIT**: Fraction of data for validation in Phase I-b.\n", - "- **PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE**: Batch size for preprocessing in Phase I-b to manage RAM.\n", - "- **PROMPT_LENGTH**: Number of tokens provided to the model to predict the next token. It is recommended to keep this as 1.\n" - ], - "metadata": { - "id": "rK0LZP7KbQqm" - } - }, - { - "cell_type": "code", - "source": [ - "# Samples to use for the neural architecture search stage\n", - "PHASE_I_A_SAMPLES_TO_CREATE = 10\n", - "\n", - "# Samples to use for the main training stage\n", - "PHASE_I_B_SAMPLES_TO_CREATE = 20\n", - "PHASE_I_B_VAL_SPLIT = 0.15\n", - "\n", - "# For Stage I-b, we preprocess in batches to avoid high RAM usage.\n", - "PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE = 10\n", - "\n", - "# How many tokens to provide before expecting the next token to be predicted.\n", - "PROMPT_LENGTH = 1\n" - ], - "metadata": { - "id": "vywbZQxAZC9R" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Model and Embedding Constants\n", - "\n", - "These constants define the size and shape of the model's text processing components.\n", - "\n", - "- **MAX_SEQ_LENGTH**: The maximum sequence length the model will handle. This has a linear relationship with RAM/CPU usage.\n", - "- **tokenizer_checkpoint**: The Hugging Face model to use for tokenization.\n", - "- **EMBEDDING_N**: A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). The resulting embedding dimensionality (EMBEDDING_DIM) for InterleavedRoPE must be an even number. Using this parameter as a proxy, rather than setting EMBEDDING_DIM directly, acts as a guard rail to ensure this constraint is met.\n", - "- **PROJECTION_N**: Controls the size of a projection layer after embedding. Increasing this value can significantly increase RAM usage.\n" - ], - "metadata": { - "id": "5jK5wbA5b8se" - } - }, - { - "cell_type": "code", - "source": [ - "# Text encoding / embedding related constants\n", - "MAX_SEQ_LENGTH = 40\n", - "\n", - "# Tokenization\n", - "tokenizer_checkpoint = \"HuggingFaceTB/SmolLM3-3B\"\n", - "tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)\n", - "\n", - "# Add special tokens for potential instruction-following formats\n", - "special_tokens = {\n", - " \"additional_special_tokens\": [\"\", \"\", \"\", \"\"]\n", - "}\n", - "tokenizer.add_special_tokens(special_tokens)\n", - "\n", - "VOCABULARY_SIZE = len(tokenizer)\n", - "\n", - "# For InterleavedRoPE, the embedding output dim must be an even number.\n", - "EMBEDDING_N = 6\n", - "EMBEDDING_DIM = int(EMBEDDING_N * 2)\n", - "\n", - "# Size of the projection layer. Keep low to manage RAM.\n", - "PROJECTION_N = 1\n" - ], - "metadata": { - "id": "4Kka_A4tb3aJ", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "6c85d1ae-52f4-4ddf-d768-ea5781b1b7da" - }, - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", - "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", - "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", - "You will be able to reuse this secret in all of your notebooks.\n", - "Please note that authentication is recommended but still optional to access public models or datasets.\n", - " warnings.warn(\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Stage I-a (NAS) Hyperparameters\n", - "\n", - "These parameters control the Neural Architecture Search process.\n", - "\n", - "- **moities_to_try**: Number of different layer permutations to try.\n", - "- **tries_per_moity**: Number of topologies to try for each permutation.\n", - "- **epochs, batch_size, learning_rate**: Standard training parameters for the NAS stage.\n", - "- **predecessor_level_connection_affinity_factor_first**: Controls connectivity density between the Input layer and the first level of Dense layers.\n", - "- **predecessor_level_connection_affinity_factor_main**: Controls connectivity density between the Input layer and the first level of Dense layers and the subsequent level of Dense layers, as well as all subsequent vertical connectivity.\n", - "- **p_lateral_connection, num_lateral_connection_tries_per_unit**: Control the density of lateral connectivity between Dense layers on the same row.\n", - "- **minimum_levels, maximum_levels**: Number of **rows of** Dense layers in the architecture grid.\n", - "- **minimum_units_per_level, maximum_units_per_level**: Number of Dense layers per row.\n", - "- **minimum_neurons_per_unit, maximum_neurons_per_unit**: The number of neurons for each Dense layer unit.\n" - ], - "metadata": { - "id": "MeoWtePacWz_" - } - }, - { - "cell_type": "code", - "source": [ - "# Cerebros [non-HP-tunable] configurables for NAS\n", - "moities_to_try = 3\n", - "tries_per_moity = 1\n", - "\n", - "### Main tunable hyperparameters for NAS ##\n", - "\n", - "POSITIONAL_EMBEDDING_DROPOUT = 0.7651951380000674\n", - "activation = 'softplus'\n", - "\n", - "# Vertical connectivity hyperparameters\n", - "predecessor_level_connection_affinity_factor_first = 17.851026458010523\n", - "predecessor_level_connection_affinity_factor_main = 21.487301631581428\n", - "\n", - "# Lateral connectivity hyperparameters\n", - "max_consecutive_lateral_connections = 7\n", - "p_lateral_connection = 0.24927354102044022\n", - "num_lateral_connection_tries_per_unit = 32\n", - "learning_rate = 0.003025583248301791\n", - "epochs = 41\n", - "batch_size = 5\n", - "gradient_accumulation_steps = 4\n", - "\n", - "# Architecture grid constraints\n", - "minimum_levels = 2\n", - "maximum_levels = 2\n", - "minimum_units_per_level = 2\n", - "maximum_units_per_level = 2\n", - "minimum_neurons_per_unit = 2\n", - "maximum_neurons_per_unit = 2\n" - ], - "metadata": { - "id": "Wbowkxnbc4Zd" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Phase I-b (Extended Training) Hyperparameters\n", - "\n", - "These parameters are for fine-tuning the best model from Stage I-a.\n", - "\n", - "- INITIAL_LR_STAGE_I_B: Initial learning rate for this phase.\n", - "- WARMUP_EPOCHS_STAGE_I_B, WARMUP_STEPS: Parameters for the learning rate scheduler.\n", - "- phase_i_b_epochs: Number of epochs for extended training.\n", - "- phase_i_b_weight_decay: Weight decay for the optimizer.\n" - ], - "metadata": { - "id": "fcGTs9ASdXps" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "## Training Stage I-b parameters:\n", - "INITIAL_LR_STAGE_I_B = 0.0039295722955565125\n", - "WARMUP_EPOCHS_STAGE_I_B = 7\n", - "WARMUP_STEPS = 1140\n", - "FIRST_DECAY_STEPS_STAGE_I_B = 1900\n", - "phase_i_b_epochs = 53\n", - "phase_i_b_gradient_accumulation_steps = 7\n", - "phase_i_b_weight_decay = 0.01647018768215773 # For AdamW\n" - ], - "metadata": { - "id": "-znwaddIdiKU" - }, - "execution_count": 8, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "# Generation Constants\n", - "\n", - "Parameters used during the text generation evaluation phase." - ], - "metadata": { - "id": "vy5y6OXhdvzV" - } - }, - { - "cell_type": "code", - "source": [ - "## Generation time configurables:\n", - "GENERATION_PROMPT_LEN = 25\n", - "MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN" - ], - "metadata": { - "id": "JHjCz9qXd5Gq" - }, - "execution_count": 9, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# **Data Preparation**\n", - "\n", - "Here, we load and subset the dataset for both training Stages.\n", - "\n", - "\n", - "We first split the Bible text samples into two sets: one for Phase I-a (NAS) and a larger one for Phase I-b (extended training).\n" - ], - "metadata": { - "id": "N7fJIZ1md-0Y" - } - }, - { - "cell_type": "code", - "source": [ - "# Get training data from the bible text samples\n", - "non_instruct_samples = bible[:PHASE_I_A_SAMPLES_TO_CREATE]\n", - "phase_i_b_samples = bible[PHASE_I_A_SAMPLES_TO_CREATE:PHASE_I_B_SAMPLES_TO_CREATE + PHASE_I_A_SAMPLES_TO_CREATE]\n", - "\n", - "print(f\"Samples from KJV bible consisting of {len(non_instruct_samples)} look like this (sub-sample of 3): {non_instruct_samples[:3]}\")\n" - ], - "metadata": { - "id": "jIFxWcBzeLjN", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "d46f8e34-3d7d-4fb4-dddc-bf1c45bae7ee" - }, - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Samples from KJV bible consisting of 10 look like this (sub-sample of 3): ['In the beginning God created the heavens and the earth.', \"The earth was formless and empty, with darkness over the deep and God's Spirit hovering over the waters.\", \"God said, 'Let there be light,' and there was light.\"]\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Preprocess Data for Phase I-a (NAS)\n", - "\n", - "The Cerebros LLM is a single head model. This means that each time the model is called, it returns only the next token. It does not regurgitate the cumulative sequence, nor does it have a separate head for each position in the sequence.\n", - "\n", - "For both training stages, each text sample is expanded into multiple input/label pairs, which we call \"sub-samples.\" There is one \"sub-sample\" for each token in the range between the first token and the first occurrence of a padding token or the end of the sequence, whichever comes first.\n", - "\n", - "For example, the sequence [t1, t2, t3] becomes:\n", - "\n", - " Input: [t1, 2, 2, 2] Label: [t2] # One hot encoded to VOCABULARY_SIZE\n", - " Input: [t1, t2, 2, 2], Label: [t3]\n", - " Input: [t1, t2, t3, 2], Label: [2]\n", - "\n", - "For training Stage 1-a, we perform the entire expansion for its training data in memory. This is because the NAS does not yet support a tf.data.Dataset object. In the future, we may retrofit the NAS algorithm to support streaming preprocessing as well, allowing us to use a larger dataset for the NAS.\n", - "\n", - "For stage I-b, the extended training stage, the same operation is done in batches. This is because this operation significantly increases the amount of memory required. The main reason for this is the one-hot encoded label, where the vocabulary size is 128,260. Since we do this in batches, this allows for a virtually unlimited number of samples to be processed.\n", - "\n", - "For reference, this is the preprocessing being applied:\n", - "\n", - "```python\n", - "def prepare_data(\n", - " data_0: List[str],\n", - " tokenizer_0: Any,\n", - " max_seq_length: int = 1024,\n", - " prompt_length: int = 1) -> Tuple[List[List[int]], List[List[int]], int]:\n", - "\n", - "\n", - " all_input_ids = []\n", - " all_labels = []\n", - "\n", - " pad_token_id = tokenizer_0.pad_token_id\n", - "\n", - " # Tokenize all data at once for efficiency\n", - " tokenized_data = tokenizer_0(\n", - " data_0,\n", - " max_length=max_seq_length,\n", - " padding='max_length',\n", - " truncation=True,\n", - " add_special_tokens=False # We'll handle special tokens manually\n", - " )\n", - " vocab_size = len(tokenizer_0)\n", - "\n", - " # Get the token ID for \n", - " end_prompt_token_id = tokenizer_0.encode(\"\", add_special_tokens=False)[0]\n", - "\n", - " # Process each sample\n", - " for sample_tokens in tokenized_data['input_ids']:\n", - " # Find the index of token\n", - " try:\n", - " end_prompt_index = sample_tokens.index(end_prompt_token_id)\n", - " except ValueError:\n", - " # If not found, treat sample as a non-instruct sample\n", - " end_prompt_index = (\n", - " prompt_length - 1) # int(np.ceil(len(sample_tokens) * (1/3))) # 0 ## 1. Give it a fair starting place to predict the next word 2. reduce the number of expanded samples\n", - "\n", - " # Find first pad token after \n", - " first_pad_index = None\n", - " for i in range(end_prompt_index + 1, len(sample_tokens)):\n", - " if sample_tokens[i] == pad_token_id:\n", - " first_pad_index = i\n", - " break\n", - "\n", - " # If no pad token found, use the end of sequence\n", - " if first_pad_index is None:\n", - " first_pad_index = len(sample_tokens)\n", - "\n", - " # Apply sliding window from after to first pad token\n", - " # Start from end_prompt_index + 1 (first token to predict)\n", - " # End at first_pad_index - 1 (last token to predict)\n", - " for i in range(end_prompt_index + 1, first_pad_index):\n", - " # Input: from start up to (but not including) token i\n", - " input_ids = sample_tokens[:i]\n", - "\n", - " # Pad or truncate to max_seq_length\n", - " if len(input_ids) > max_seq_length:\n", - " input_ids = input_ids[:max_seq_length]\n", - " else:\n", - " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n", - "\n", - " # Label: one-hot encoding of token at position i\n", - " next_token = sample_tokens[i]\n", - " label = [0] * vocab_size\n", - " label[next_token] = 1\n", - "\n", - " all_input_ids.append(input_ids)\n", - " all_labels.append(label)\n", - "\n", - " # Add final sample with pad token as label to indicate termination\n", - " if first_pad_index < len(sample_tokens): # Only if there's actually a pad token\n", - " input_ids = sample_tokens[:first_pad_index]\n", - "\n", - " # Pad or truncate to max_seq_length\n", - " if len(input_ids) > max_seq_length:\n", - " input_ids = input_ids[:max_seq_length]\n", - " else:\n", - " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n", - "\n", - " # Label: one-hot encoding of pad token\n", - " label = [0] * vocab_size\n", - " label[pad_token_id] = 1\n", - "\n", - " all_input_ids.append(input_ids)\n", - " all_labels.append(label)\n", - "\n", - " return all_input_ids, all_labels, vocab_size\n", - "```\n" - ], - "metadata": { - "id": "8Tu8X9cVeQVD" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "# Preprocess data for Stage I-a training\n", - "x, y, vocab_size = prepare_data(data_0=non_instruct_samples,\n", - " tokenizer_0=tokenizer,\n", - " max_seq_length=MAX_SEQ_LENGTH,\n", - " prompt_length=PROMPT_LENGTH)\n", - "\n", - "# Split the preprocessed data for NAS training and validation\n", - "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.85, shuffle=False)\n", - "\n", - "# Package data into lists for the Cerebros AutoML component\n", - "x_train_tf = tf.constant(X_train, tf.int32)\n", - "y_train_tf = tf.constant(y_train, tf.float32)\n", - "x_train_packaged = [x_train_tf]\n", - "y_train_packaged = [y_train_tf]\n", - "\n", - "# Do the same for the validation data\n", - "x_test_tf = tf.constant(X_test, tf.int32)\n", - "y_test_tf = tf.constant(y_test, tf.float32)\n", - "x_test_packaged = [x_test_tf]\n", - "y_test_packaged = [y_test_tf]\n", - "\n", - "# Define input and output shapes for the AutoML model\n", - "INPUT_SHAPES = [(MAX_SEQ_LENGTH,)]\n", - "OUTPUT_SHAPES = [(VOCABULARY_SIZE)]\n" - ], - "metadata": { - "id": "EDyuTMLufYvs" - }, - "execution_count": 11, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Train, Test Split of the Data for Stage I-b training\n", - "\n", - "We split the larger Phase I-b dataset into training and validation sets. Again, this dataset will be processed by a streaming generator in batches to avoid memory saturation and make the training more scalable. We will revisit that later." - ], - "metadata": { - "id": "zX60zcpykasl" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "# Split the phase I-b data set for training and validation\n", - "phase_i_b_train_samples, phase_i_b_val_samples = train_test_split(\n", - " phase_i_b_samples,\n", - " test_size=PHASE_I_B_VAL_SPLIT,\n", - " shuffle=False\n", - ")\n" - ], - "metadata": { - "id": "SMSdkFRPkg7D" - }, - "execution_count": 12, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "phase_i_b_train_samples[:3]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Oqw-T7bOo1GD", - "outputId": "2e8f24fc-24c2-4a06-babb-550b676b7751" - }, - "execution_count": 13, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[\"God said, 'Let the earth produce vegetation, seed-bearing plants, and fruit trees, each according to its kind,' and it was so.\",\n", - " 'The earth brought forth grass, seed-bearing herbs, and fruit trees, each with its seed, and God saw that it was good.',\n", - " 'There was evening and morning, the third day.']" - ] - }, - "metadata": {}, - "execution_count": 13 - } - ] - }, - { - "cell_type": "code", - "source": [ - "X_train[:2]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "Hv_52izIjOQ7", - "outputId": "e2972924-0190-4f16-9317-c00100486203" - }, - "execution_count": 14, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[[644,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012],\n", - " [644,\n", - " 279,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012,\n", - " 128012]]" - ] - }, - "metadata": {}, - "execution_count": 14 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Base Text Embedding Model Definition\n", - "\n", - "- Before we run the NAS, we define a base model that handles token embeddings and positional embeddings.\n", - "- The SimpleCerebrosRandomSearch will then attach its auto-generated lattice of dense layers on top of this base model.\n", - "- The Cerebros NAS takes an init parameter base_models: List[tf.keras.Model]\n" - ], - "metadata": { - "id": "11Ri4PtKktih" - } - }, - { - "cell_type": "code", - "source": [ - "####### Text embedding base model #####################\n", - "\n", - "inp = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype=tf.int32)\n", - "\n", - "# Token embedding layer\n", - "embedded = tf.keras.layers.Embedding(\n", - " input_dim=VOCABULARY_SIZE,\n", - " output_dim=EMBEDDING_DIM,\n", - " input_length=MAX_SEQ_LENGTH,\n", - " mask_zero=False\n", - ")(inp)\n", - "\n", - "# Interleaved Rotary Positional Embedding (iRoPE)\n", - "position_embedding = InterleavedRoPE(\n", - " dim=EMBEDDING_DIM,\n", - " max_seq_len=MAX_SEQ_LENGTH,\n", - ")(embedded)\n", - "\n", - "# Concatenate token and positional embeddings\n", - "x = tf.keras.layers.Concatenate()([embedded, position_embedding])\n", - "x = tf.keras.layers.Dropout(POSITIONAL_EMBEDDING_DROPOUT)(x)\n", - "\n", - "# Flatten and project to the desired dimension\n", - "flattened = tf.keras.layers.Flatten()(x)\n", - "projected = tf.keras.layers.Dense(EMBEDDING_DIM * PROJECTION_N)(flattened)\n", - "\n", - "# Create the base Keras model\n", - "cerebros_base_model = tf.keras.Model(\n", - " inputs=inp,\n", - " outputs=projected\n", - ")\n" - ], - "metadata": { - "id": "tn1qrGISn_Pe", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e76e091c-6e7f-4820-ef79-15143f1e6b64" - }, - "execution_count": 15, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/keras/src/layers/core/embedding.py:97: UserWarning: Argument `input_length` is deprecated. Just remove it.\n", - " warnings.warn(\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "## FYI: The iRoPE Embedding:\n", - "\n", - "The RoPE embedding, and helper functions it depends on (previously imported from the local package cerebrosllmutils):\n", - "\n", - "- iRoPE: Interleaved Rotary Positional Embedding\n", - "- RoPE: Rotary Positional Embedding\n", - "- The Rotary Positional Embedding expresses positional relationships as angles, extends feasible context window.\n", - "- iRoPE: iRoPE applies the rotation in an interleaved manner and enables capturing more nuance and extending context windows feasible to around 2 million tokens.\n", - "\n", - "```python\n", - "# --- Base Rotary Positional Embedding\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='RotaryEmbedding')\n", - "class RotaryEmbedding(tf.keras.layers.Layer):\n", - " def __init__(self, dim, max_seq_len=1024, temperature=10000.0, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.dim = dim\n", - " # Ensure dim is even right at initialization\n", - " if self.dim % 2 != 0:\n", - " raise ValueError(f\"Embedding dimension `dim` ({self.dim}) must be even for RotaryEmbedding.\")\n", - " self.max_seq_len = max_seq_len\n", - " self.temperature = temperature\n", - " # *** No calculation or storage of inv_freq here or in build ***\n", - "\n", - " def build(self, input_shape):\n", - " # Build should primarily be for creating trainable weights, which we don't have.\n", - " # Call super().build() for Keras compatibility.\n", - " super().build(input_shape)\n", - "\n", - " def call(self, x): # Removed seq_len argument, calculate from x\n", - " shape = tf.shape(x)\n", - " batch_size = shape[0]\n", - " actual_seq_len = shape[1]\n", - "\n", - " # *** Calculate inv_freq inside call ***\n", - " inv_freq_base = tf.range(0, self.dim, 2, dtype=tf.float32)\n", - " inv_freq = 1.0 / (self.temperature ** (inv_freq_base / self.dim))\n", - " # Ensure inv_freq has the correct shape [dim/2]\n", - " inv_freq = tf.cast(inv_freq, dtype=x.dtype) # Match dtype early\n", - "\n", - " # Use actual_seq_len for calculations\n", - " position = tf.range(actual_seq_len, dtype=x.dtype) # Match dtype\n", - "\n", - " # Calculate sinusoid input using einsum or broadcasting\n", - " # Einsum approach: Ensure correct dimensions [seq_len, dim/2]\n", - " sinusoid_inp = tf.einsum(\"i,j->ij\", position, inv_freq)\n", - "\n", - " # Calculate sin and cos based on the actual sequence length\n", - " sin = tf.sin(sinusoid_inp)\n", - " cos = tf.cos(sinusoid_inp)\n", - "\n", - " # Repeat sin/cos for interleaving: [a, b] -> [a, a, b, b]\n", - " # Result needs shape [actual_seq_len, dim]\n", - " sin = tf.repeat(sin, 2, axis=-1)\n", - " cos = tf.repeat(cos, 2, axis=-1)\n", - "\n", - " # Expand dims for batch and tile\n", - " # Output shape needs to be [batch_size, actual_seq_len, dim]\n", - " # Add batch dimension: [1, actual_seq_len, dim]\n", - " sin = tf.expand_dims(sin, axis=0)\n", - " cos = tf.expand_dims(cos, axis=0)\n", - "\n", - " # Tile to match the batch size: [batch_size, actual_seq_len, dim]\n", - " sin = tf.tile(sin, [batch_size, 1, 1])\n", - " cos = tf.tile(cos, [batch_size, 1, 1])\n", - "\n", - " # Casting to x.dtype was already done for inv_freq, sin/cos will inherit\n", - " # sin = tf.cast(sin, x.dtype) # Already done via calculation chain\n", - " # cos = tf.cast(cos, x.dtype) # Already done via calculation chain\n", - "\n", - " # Return sin and cos needed by InterleavedRoPE\n", - " return sin, cos\n", - "\n", - " def get_config(self):\n", - " config = super().get_config()\n", - " config.update({\n", - " \"dim\": self.dim,\n", - " \"max_seq_len\": self.max_seq_len,\n", - " \"temperature\": self.temperature,\n", - " })\n", - " return config\n", - "\n", - " @classmethod\n", - " def from_config(cls, config):\n", - " return cls(**config)\n", - "\n", - "\n", - "# iRoPE helper functions\n", - "\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='split_alternate')\n", - "def split_alternate(x):\n", - " shape = tf.shape(x)\n", - " x = tf.reshape(x, [shape[0], shape[1], shape[2] // 2, 2])\n", - " x = tf.transpose(x, [0, 1, 3, 2])\n", - " x = tf.reshape(x, [shape[0], shape[1], -1])\n", - " return x\n", - "\n", - "\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='rotate_half')\n", - "def rotate_half(x):\n", - " x = split_alternate(x)\n", - " d = tf.shape(x)[-1]\n", - " rotated_x = tf.concat([-x[..., d // 2:], x[..., :d // 2]], axis=-1)\n", - " return tf.reshape(rotated_x, tf.shape(x))\n", - "\n", - "\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='apply_rotary_pos_emb')\n", - "def apply_rotary_pos_emb(x, sin, cos):\n", - " cos = tf.reshape(cos, [tf.shape(cos)[0], tf.shape(cos)[1], -1])\n", - " sin = tf.reshape(sin, [tf.shape(sin)[0], tf.shape(sin)[1], -1])\n", - " x_rotated = x * cos + rotate_half(x) * sin\n", - " return x_rotated\n", - "\n", - "\n", - "# interleaved Rotary Postional Embedding (iRoPE)\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='InterleavedRoPE')\n", - "class InterleavedRoPE(tf.keras.layers.Layer):\n", - " def __init__(self, dim, max_seq_len=1024, **kwargs):\n", - " super().__init__(**kwargs)\n", - " if dim % 2 != 0:\n", - " raise ValueError(f\"Embedding dimension `dim` ({dim}) must be even for InterleavedRoPE.\")\n", - " self.dim = dim\n", - " self.max_seq_len = max_seq_len\n", - " # Instantiate the RotaryEmbedding layer\n", - " # Ensure the name is consistent if needed for saving/loading\n", - " self.rotary_emb = RotaryEmbedding(dim, max_seq_len, name=\"rotary_embedding\")\n", - "\n", - " def call(self, x):\n", - " # Get sin and cos from the RotaryEmbedding layer's call method\n", - " # *** Pass only 'x'. RotaryEmbedding calculates seq_len internally. ***\n", - " sin, cos = self.rotary_emb(x)\n", - "\n", - " # Apply the positional embeddings\n", - " x_embedded = apply_rotary_pos_emb(x, sin, cos)\n", - " return x_embedded\n", - "\n", - " def get_config(self):\n", - " config = super().get_config()\n", - " config.update({\n", - " \"dim\": self.dim,\n", - " \"max_seq_len\": self.max_seq_len,\n", - " })\n", - " # Keras handles nested layer serialization automatically\n", - " return config\n", - "\n", - " @classmethod\n", - " def from_config(cls, config):\n", - " # Keras handles nested layer restoration automatically\n", - " return cls(**config)\n", - "```" - ], - "metadata": { - "id": "CXtYv20vpkMY" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Custom metric Perplexity (previously imported from the local package cerebrosllmutils):\n", - "\n", - "Since there is not a Perplexity metric in tensorflow.keras.metrics, we created our own, and one designed for this single - head model.\n", - "\n", - "## This is what it looks like:\n", - "\n", - "```python\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='Perplexity')\n", - "class Perplexity(tf.keras.metrics.Metric):\n", - " \"\"\"\n", - " Computes perplexity, defined as e^(categorical crossentropy).\n", - " \"\"\"\n", - "\n", - " def __init__(self, name='perplexity', **kwargs):\n", - " super().__init__(name=name, **kwargs)\n", - " self.total_crossentropy = self.add_weight(name='total_crossentropy', initializer='zeros')\n", - " self.count = self.add_weight(name='count', initializer='zeros')\n", - "\n", - " def update_state(self, y_true, y_pred, sample_weight=None):\n", - " # Calculate categorical crossentropy\n", - " crossentropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)\n", - "\n", - " # Update the running sum of crossentropy and the count of samples\n", - " self.total_crossentropy.assign_add(tf.reduce_sum(crossentropy))\n", - " self.count.assign_add(tf.cast(tf.shape(y_true)[0], dtype=tf.float32))\n", - "\n", - " def result(self):\n", - " # Compute the average crossentropy\n", - " average_crossentropy = self.total_crossentropy / self.count\n", - " # Compute perplexity as e^(average crossentropy)\n", - " return tf.exp(average_crossentropy)\n", - "\n", - " def reset_state(self):\n", - " # Reset the state variables\n", - " self.total_crossentropy.assign(0.0)\n", - " self.count.assign(0.0)\n", - "```\n" - ], - "metadata": { - "id": "uN3adqRLo61X" - } - }, - { - "cell_type": "code", - "source": [ - "# Custom metric: Perplexity\n", - "perplexity_metric = Perplexity()" - ], - "metadata": { - "id": "_8uTBW_to7iQ" - }, - "execution_count": 16, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "# Stage I-a training: Neural Architecture Search (NAS)\n", - "\n", - "We now run the SimpleCerebrosRandomSearch to find the best performing architecture based on the training data and the base model. The search aims to minimize the perplexity in the train set. The search aims to minimize the perplexity in the training set. Obviously, in a full - scale run, we would use the validation set's value.\n", - "\n", - "- The Cerebros NAS will parse a block composed of rows (Levels) of multiple Dense layers (Units) with an overlapping, interleaved, interwoven topology both laterally between Dense layers on the same row and vertically between layers on different levels.\n", - "- This topology emulates the neuroscience principle of modularity.\n", - "- This topology allows local clusters of densely connected neurons to learn specialized fragments of a problem, while allowing efficient communication between these clusters to coordinate among themselves to compose a solution to a complex problem.\n", - "\n", - "For the deep technical details of how Cerebros NAS works: [How Cerebros NAS Works](https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/277-attempt-to-imporve-parameters-on--dev-branch-275/documentation/cerebros-technical-details.md)\n", - "\n", - "## This is what a neural network parsed by Cerebros looks like:\n", - "\n", - "- Green triangles: Input layers\n", - "- Blue squares: Concatenate layer -> [BatchNormalization | Dropout]\n", - "- Pink ovals: Hidden Dense layers\n", - "- Red oval: Output Dense layer\n" - ], - "metadata": { - "id": "tWjbHiHRMhR4" - } - }, - { - "cell_type": "markdown", - "source": [ - "![Brain-lookalike1.png]()" - ], - "metadata": { - "id": "1wR8EVItNNh_" - } - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "## For a more readable view of that this looks like\n", - "\n", - "![image.png]()\n" - ], - "metadata": { - "id": "_bXR1QxaLPiq" - } - }, - { - "cell_type": "code", - "source": [ - "######## Instantiate Cerebros Neural Architecture Search #######\n", - "\n", - "# Project metadata\n", - "TIME = pendulum.now(tz='America/New_York').__str__()[:16].replace('T', '_').replace(':', '_').replace('-', '_')\n", - "PROJECT_NAME = f'{TIME}_cerebros_not-gpt'\n", - "meta_trial_number = 42\n", - "\n", - "# Initialize the AutoML search\n", - "cerebros_automl = SimpleCerebrosRandomSearch(\n", - " unit_type=DenseUnit,\n", - " input_shapes=INPUT_SHAPES,\n", - " output_shapes=OUTPUT_SHAPES,\n", - " training_data=x_train_packaged,\n", - " labels=y_train_packaged,\n", - " validation_split=0.2,\n", - " direction='minimize',\n", - " metric_to_rank_by=\"perplexity\",\n", - " minimum_levels=minimum_levels,\n", - " maximum_levels=maximum_levels,\n", - " minimum_units_per_level=minimum_units_per_level,\n", - " maximum_units_per_level=maximum_units_per_level,\n", - " minimum_neurons_per_unit=minimum_neurons_per_unit,\n", - " maximum_neurons_per_unit=maximum_neurons_per_unit,\n", - " activation=activation,\n", - " final_activation='softmax',\n", - " number_of_architecture_moities_to_try=moities_to_try,\n", - " number_of_tries_per_architecture_moity=tries_per_moity,\n", - " predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,\n", - " predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,\n", - " predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,\n", - " max_consecutive_lateral_connections=max_consecutive_lateral_connections,\n", - " p_lateral_connection=p_lateral_connection,\n", - " p_lateral_connection_decay=zero_95_exp_decay,\n", - " num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,\n", - " learning_rate=learning_rate,\n", - " loss=tf.keras.losses.CategoricalCrossentropy(),\n", - " metrics=[tf.keras.metrics.CategoricalAccuracy(), perplexity_metric],\n", - " epochs=epochs,\n", - " project_name=f\"{PROJECT_NAME}_meta_{meta_trial_number}\",\n", - " model_graphs='model_graphs',\n", - " batch_size=batch_size,\n", - " gradient_accumulation_steps=gradient_accumulation_steps,\n", - " meta_trial_number=meta_trial_number,\n", - " base_models=[cerebros_base_model],\n", - " train_data_dtype=tf.int32\n", - ")" - ], - "metadata": { - "id": "XV2q_5WEwBJ0" - }, - "execution_count": 17, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Run the Cerebros Neural Architecture Search\n" - ], - "metadata": { - "id": "TJVLfmJ2virA" - } - }, - { - "cell_type": "code", - "source": [ - "cerebros_t0 = time.time()\n", - "phase_i_a_result_0 = cerebros_automl.run_random_search()\n", - "cerebros_t1 = time.time()\n", - "\n", - "# Report results\n", - "cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0) / 60\n", - "models_tried = moities_to_try * tries_per_moity\n", - "cerebros_time_per_model = cerebros_time_all_models_min / models_tried\n", - "phase_i_a_result = float(phase_i_a_result_0)\n", - "\n", - "print(f\"Cerebros trained {models_tried} models in {cerebros_time_all_models_min:.2f} min. Average time per model: {cerebros_time_per_model:.2f} min.\")\n", - "print(f'Cerebros best perplexity achieved in Phase I-a is {phase_i_a_result}')" - ], - "metadata": { - "id": "ulL0EGnow5L7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "outputId": "d56dd1ec-2f7b-4a3c-ecc6-75e595910367" - }, - "execution_count": 18, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\rGlobal task progress: 0%|\u001b[38;2;22;206;235m \u001b[0m| 0/3 [00:00nnf>ceil\n", - "k is: 0 value is: [{'1': }]\n", - "0\n", - "k is: 1 value is: [{'2': }, {'2': }]\n", - "1\n", - "Trying to create level 1\n", - "We think level 1's predecessors are: [0]\n", - "k is: 2 value is: [{'128260': }]\n", - "2\n", - "Trying to create Final level 2\n", - "Trying to create level 2\n", - "We think level final level 2's predecessors are: [0, 1]\n", - "levels:\n", - "[0, 1, 2]\n", - "{'0': 'InputUnitModule'}\n", - "InputLevel.input_shapes [(40,)]\n", - "{'2': }\n", - "{'2': }\n", - "Debug: I am 2 selecting 1\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", - "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", - "Debug: successor_connectivity_errors_2d []\n", - "$$$$$$>>>>> Base model: \n", - "InputUnit.input_shape: (40,)\n", - "{'2': }\n", - "{'2': }\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "Debug: successor_connectivity_errors_2d []\n", - "Debug: successor_connectivity_errors_2d []\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_0 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_1 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "{'128260': }\n", - "debug: meta_level_number\n", - "Debug: successor_connectivity_errors_2d []\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_FinalDenseLevel_0000000000000002_tr_0_FinalDenseUnit_0000000000000002_tr_0_0 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "inputs\n", - "\n", - "\n", - "outputs\n", - "\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_0_nn_materialized\"\u001b[0m\n" - ], - "text/html": [ - "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_0_nn_materialized\"\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", - "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", - "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", - "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" - ], - "text/html": [ - "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
-              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
-              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
-              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0]  โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0]  โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" - ], - "text/html": [ - "
 Total params: 52,476,644 (200.18 MB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" - ], - "text/html": [ - "
 Trainable params: 52,474,124 (200.17 MB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" - ], - "text/html": [ - "
 Non-trainable params: 2,520 (9.84 KB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "None\n", - "Epoch 1/41\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", - "Expected: NeuralNetworkFuture_0000000000000nan_tr_0_InputLevel_0000000000000000_tr_0_InputUnit_0000000000000000_tr_0_0_inp\n", - "Received: inputs=('Tensor(shape=(None, 40))',)\n", - " warnings.warn(msg)\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 752ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.7672 - perplexity: 128956.3438 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7557 - val_perplexity: 127482.9922\n", - "Epoch 2/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 547ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.6423 - perplexity: 113970.0938 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7555 - val_perplexity: 127447.9844\n", - "Epoch 3/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 785ms/step - categorical_accuracy: 0.1574 - loss: 11.5549 - perplexity: 104629.7031 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7555 - val_perplexity: 127457.9531\n", - "Epoch 4/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 510ms/step - categorical_accuracy: 0.1518 - loss: 11.2911 - perplexity: 80904.3125 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7396 - val_perplexity: 125437.5078\n", - "Epoch 5/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 594ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.7358 - perplexity: 125450.3906 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7296 - val_perplexity: 124199.8984\n", - "Epoch 6/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 869ms/step - categorical_accuracy: 0.1185 - loss: 11.0556 - perplexity: 65319.5391 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7240 - val_perplexity: 123501.3828\n", - "Epoch 7/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 589ms/step - categorical_accuracy: 0.0506 - loss: 11.3671 - perplexity: 90319.2578 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7149 - val_perplexity: 122378.4219\n", - "Epoch 8/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 539ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.1610 - perplexity: 70926.6328 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7186 - val_perplexity: 122839.8203\n", - "Epoch 9/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 629ms/step - categorical_accuracy: 0.1496 - loss: 10.9728 - perplexity: 66133.8672 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7250 - val_perplexity: 123618.0391\n", - "Epoch 10/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 561ms/step - categorical_accuracy: 0.1475 - loss: 10.0717 - perplexity: 24002.7051 - val_categorical_accuracy: 0.1667 - val_loss: 11.7307 - val_perplexity: 124332.1562\n", - "Epoch 11/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 558ms/step - categorical_accuracy: 0.2024 - loss: 10.4731 - perplexity: 36918.5938 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7395 - val_perplexity: 125429.9766\n", - "Epoch 12/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 530ms/step - categorical_accuracy: 0.0000e+00 - loss: 10.1507 - perplexity: 27366.1113 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7502 - val_perplexity: 126783.1797\n", - "Epoch 13/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 750ms/step - categorical_accuracy: 0.0734 - loss: 10.4913 - perplexity: 43854.1094 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7526 - val_perplexity: 127089.4531\n", - "Epoch 14/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 583ms/step - categorical_accuracy: 0.3086 - loss: 9.0654 - perplexity: 9803.9824 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7553 - val_perplexity: 127423.6797\n", - "Epoch 15/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 528ms/step - categorical_accuracy: 0.2697 - loss: 9.0867 - perplexity: 10961.3613 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7623 - val_perplexity: 128316.8125\n", - "Epoch 16/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 582ms/step - categorical_accuracy: 0.0685 - loss: 9.1616 - perplexity: 10116.4492 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.8354 - val_perplexity: 138047.7344\n", - "Epoch 17/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 797ms/step - categorical_accuracy: 0.1518 - loss: 7.9130 - perplexity: 2808.9939 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.8843 - val_perplexity: 144976.3594\n", - "Epoch 18/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 1s/step - categorical_accuracy: 0.2169 - loss: 7.4165 - perplexity: 1843.1222 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.9479 - val_perplexity: 154489.3906\n", - "Epoch 19/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 510ms/step - categorical_accuracy: 0.1996 - loss: 8.1748 - perplexity: 4106.3154 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.0354 - val_perplexity: 168615.2344\n", - "Epoch 20/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 742ms/step - categorical_accuracy: 0.0839 - loss: 7.6041 - perplexity: 2107.0347 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.1744 - val_perplexity: 193765.5312\n", - "Epoch 21/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 601ms/step - categorical_accuracy: 0.2080 - loss: 7.4821 - perplexity: 1883.4858 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.2587 - val_perplexity: 210814.2656\n", - "Epoch 22/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 526ms/step - categorical_accuracy: 0.2036 - loss: 7.1881 - perplexity: 1867.2930 - val_categorical_accuracy: 0.1667 - val_loss: 12.3405 - val_perplexity: 228771.9219\n", - "Epoch 23/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 792ms/step - categorical_accuracy: 0.1919 - loss: 7.0572 - perplexity: 1222.1584 - val_categorical_accuracy: 0.1667 - val_loss: 12.4140 - val_perplexity: 246219.7031\n", - "Epoch 24/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 688ms/step - categorical_accuracy: 0.1685 - loss: 5.6640 - perplexity: 308.2304 - val_categorical_accuracy: 0.1667 - val_loss: 12.5863 - val_perplexity: 292515.5625\n", - "Epoch 25/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 609ms/step - categorical_accuracy: 0.1407 - loss: 6.4666 - perplexity: 751.4036 - val_categorical_accuracy: 0.1667 - val_loss: 12.6761 - val_perplexity: 320013.0000\n", - "Epoch 26/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 895ms/step - categorical_accuracy: 0.0839 - loss: 5.3843 - perplexity: 352.6757 - val_categorical_accuracy: 0.1667 - val_loss: 12.7511 - val_perplexity: 344943.8125\n", - "Epoch 27/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 516ms/step - categorical_accuracy: 0.2120 - loss: 5.6307 - perplexity: 300.5551 - val_categorical_accuracy: 0.1667 - val_loss: 12.8756 - val_perplexity: 390664.9688\n", - "Epoch 28/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 614ms/step - categorical_accuracy: 0.1685 - loss: 4.4140 - perplexity: 99.5634 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1954 - val_perplexity: 537862.5000\n", - "Epoch 29/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 517ms/step - categorical_accuracy: 0.0568 - loss: 5.8209 - perplexity: 412.2969 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3590 - val_perplexity: 633498.9375\n", - "Epoch 30/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 748ms/step - categorical_accuracy: 0.1864 - loss: 4.9144 - perplexity: 157.2443 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5253 - val_perplexity: 748103.1875\n", - "Epoch 31/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 507ms/step - categorical_accuracy: 0.1052 - loss: 8.2503 - perplexity: 22384.6094 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6228 - val_perplexity: 824754.3750\n", - "Epoch 32/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 612ms/step - categorical_accuracy: 0.4431 - loss: 4.0581 - perplexity: 75.1973 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0377 - val_perplexity: 1248790.8750\n", - "Epoch 33/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 766ms/step - categorical_accuracy: 0.2086 - loss: 5.6123 - perplexity: 301.7467 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2131 - val_perplexity: 1488169.6250\n", - "Epoch 34/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 569ms/step - categorical_accuracy: 0.2919 - loss: 4.4319 - perplexity: 154.5172 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2928 - val_perplexity: 1611684.3750\n", - "Epoch 35/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 585ms/step - categorical_accuracy: 0.1802 - loss: 5.1381 - perplexity: 190.7273 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.4868 - val_perplexity: 1956789.0000\n", - "Epoch 36/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 639ms/step - categorical_accuracy: 0.1719 - loss: 4.6314 - perplexity: 111.0518 - val_categorical_accuracy: 0.1667 - val_loss: 14.5656 - val_perplexity: 2117109.5000\n", - "Epoch 37/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 693ms/step - categorical_accuracy: 0.0839 - loss: 6.8925 - perplexity: 1205.9113 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6420 - val_perplexity: 2285232.2500\n", - "Epoch 38/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 594ms/step - categorical_accuracy: 0.2530 - loss: 5.8083 - perplexity: 927.8478 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6140 - val_perplexity: 2222210.0000\n", - "Epoch 39/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 557ms/step - categorical_accuracy: 0.1913 - loss: 4.0802 - perplexity: 62.5591 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5886 - val_perplexity: 2166540.2500\n", - "Epoch 40/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 757ms/step - categorical_accuracy: 0.2987 - loss: 4.2323 - perplexity: 90.9604 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6538 - val_perplexity: 2312421.2500\n", - "Epoch 41/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 529ms/step - categorical_accuracy: 0.2453 - loss: 3.7488 - perplexity: 50.1163 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6478 - val_perplexity: 2298534.5000\n", - "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000000_subtrial_0000000000000000.txt\n", - "returning trial 0 oracles\n", - " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", - "0 0.000000 11.769061 129192.796875 0.000000 \n", - "1 0.000000 11.635833 113077.960938 0.000000 \n", - "2 0.130435 11.652204 114944.367188 0.000000 \n", - "3 0.130435 11.464634 95285.593750 0.000000 \n", - "4 0.000000 11.768666 129141.796875 0.000000 \n", - "5 0.130435 10.994949 59572.500000 0.000000 \n", - "6 0.043478 11.276978 78982.257812 0.000000 \n", - "7 0.000000 11.120511 67542.414062 0.000000 \n", - "8 0.173913 10.726726 45557.273438 0.000000 \n", - "9 0.217391 10.059676 23380.933594 0.166667 \n", - "10 0.173913 10.355123 31417.570312 0.000000 \n", - "11 0.000000 10.472779 35340.300781 0.000000 \n", - "12 0.086957 10.171259 26140.964844 0.000000 \n", - "13 0.217391 9.254299 10449.392578 0.000000 \n", - "14 0.217391 8.896774 7308.360840 0.000000 \n", - "15 0.130435 9.018457 8254.035156 0.000000 \n", - "16 0.130435 8.039083 3099.770996 0.000000 \n", - "17 0.217391 7.848331 2561.456787 0.000000 \n", - "18 0.173913 7.948806 2832.192139 0.000000 \n", - "19 0.043478 7.698378 2204.769043 0.000000 \n", - "20 0.173913 7.669386 2141.766846 0.000000 \n", - "21 0.260870 6.773150 874.061218 0.166667 \n", - "22 0.217391 7.382279 1607.248413 0.166667 \n", - "23 0.130435 6.034015 417.387543 0.166667 \n", - "24 0.130435 6.000526 403.641022 0.166667 \n", - "25 0.043478 6.586512 725.246826 0.166667 \n", - "26 0.260870 5.741646 311.576935 0.166667 \n", - "27 0.130435 5.138083 170.388733 0.000000 \n", - "28 0.086957 5.670679 290.231415 0.000000 \n", - "29 0.217391 5.602477 271.096985 0.000000 \n", - "30 0.173913 6.986033 1081.422852 0.000000 \n", - "31 0.304348 4.127844 62.044033 0.000000 \n", - "32 0.217391 5.934126 377.709869 0.000000 \n", - "33 0.217391 5.564253 260.930054 0.000000 \n", - "34 0.173913 5.642823 282.258331 0.000000 \n", - "35 0.173913 4.475579 87.845474 0.166667 \n", - "36 0.043478 6.194771 490.179321 0.000000 \n", - "37 0.217391 5.472395 238.029572 0.000000 \n", - "38 0.173913 4.001881 54.700928 0.000000 \n", - "39 0.304348 3.707729 40.761116 0.000000 \n", - "40 0.260870 4.130568 62.213223 0.000000 \n", - "\n", - " val_loss val_perplexity trial_number subtrial_number \\\n", - "0 11.755738 1.274830e+05 0 0 \n", - "1 11.755464 1.274480e+05 0 0 \n", - "2 11.755542 1.274580e+05 0 0 \n", - "3 11.739563 1.254375e+05 0 0 \n", - "4 11.729648 1.241999e+05 0 0 \n", - "5 11.724008 1.235014e+05 0 0 \n", - "6 11.714873 1.223784e+05 0 0 \n", - "7 11.718637 1.228398e+05 0 0 \n", - "8 11.724952 1.236180e+05 0 0 \n", - "9 11.730713 1.243322e+05 0 0 \n", - "10 11.739503 1.254300e+05 0 0 \n", - "11 11.750234 1.267832e+05 0 0 \n", - "12 11.752646 1.270895e+05 0 0 \n", - "13 11.755273 1.274237e+05 0 0 \n", - "14 11.762258 1.283168e+05 0 0 \n", - "15 11.835355 1.380477e+05 0 0 \n", - "16 11.884326 1.449764e+05 0 0 \n", - "17 11.947881 1.544894e+05 0 0 \n", - "18 12.035375 1.686152e+05 0 0 \n", - "19 12.174404 1.937655e+05 0 0 \n", - "20 12.258733 2.108143e+05 0 0 \n", - "21 12.340481 2.287719e+05 0 0 \n", - "22 12.413980 2.462197e+05 0 0 \n", - "23 12.586273 2.925156e+05 0 0 \n", - "24 12.676117 3.200130e+05 0 0 \n", - "25 12.751137 3.449438e+05 0 0 \n", - "26 12.875606 3.906650e+05 0 0 \n", - "27 13.195358 5.378625e+05 0 0 \n", - "28 13.359014 6.334989e+05 0 0 \n", - "29 13.525296 7.481032e+05 0 0 \n", - "30 13.622841 8.247544e+05 0 0 \n", - "31 14.037686 1.248791e+06 0 0 \n", - "32 14.213058 1.488170e+06 0 0 \n", - "33 14.292789 1.611684e+06 0 0 \n", - "34 14.486815 1.956789e+06 0 0 \n", - "35 14.565562 2.117110e+06 0 0 \n", - "36 14.641978 2.285232e+06 0 0 \n", - "37 14.614013 2.222210e+06 0 0 \n", - "38 14.588642 2.166540e+06 0 0 \n", - "39 14.653806 2.312421e+06 0 0 \n", - "40 14.647781 2.298534e+06 0 0 \n", - "\n", - " model_name \n", - "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", - " self.pid = os.fork()\n", - "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", - " self.pid = os.fork()\n", - "Global task progress: 33%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–Ž \u001b[0m| 1/3 [03:54<07:49, 234.85s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "SimpleCerebrosRandomSearch.input_shapes: [(40,)]\n", - "nan\n", - ">nnf>ceil\n", - "k is: 0 value is: [{'1': }]\n", - "0\n", - "k is: 1 value is: [{'2': }, {'2': }]\n", - "1\n", - "Trying to create level 1\n", - "We think level 1's predecessors are: [0]\n", - "k is: 2 value is: [{'128260': }]\n", - "2\n", - "Trying to create Final level 2\n", - "Trying to create level 2\n", - "We think level final level 2's predecessors are: [0, 1]\n", - "levels:\n", - "[0, 1, 2]\n", - "{'0': 'InputUnitModule'}\n", - "InputLevel.input_shapes [(40,)]\n", - "{'2': }\n", - "{'2': }\n", - "Debug: I am 2 selecting 1\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", - "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", - "Debug: successor_connectivity_errors_2d []\n", - "$$$$$$>>>>> Base model: \n", - "InputUnit.input_shape: (40,)\n", - "{'2': }\n", - "{'2': }\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "Debug: successor_connectivity_errors_2d []\n", - "Debug: successor_connectivity_errors_2d []\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_DenseLevel_0000000000000001_tr_1_DenseUnit_0000000000000001_tr_1_0 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_DenseLevel_0000000000000001_tr_1_DenseUnit_0000000000000001_tr_1_1 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "{'128260': }\n", - "debug: meta_level_number\n", - "Debug: successor_connectivity_errors_2d []\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_FinalDenseLevel_0000000000000002_tr_1_FinalDenseUnit_0000000000000002_tr_1_0 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "inputs\n", - "\n", - "\n", - "outputs\n", - "\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_1_nn_materialized\"\u001b[0m\n" - ], - "text/html": [ - "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_1_nn_materialized\"\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", - "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", - "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", - "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" - ], - "text/html": [ - "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
-              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
-              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
-              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0]  โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0]  โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" - ], - "text/html": [ - "
 Total params: 52,476,644 (200.18 MB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" - ], - "text/html": [ - "
 Trainable params: 52,474,124 (200.17 MB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" - ], - "text/html": [ - "
 Non-trainable params: 2,520 (9.84 KB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "None\n", - "Epoch 1/41\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", - "Expected: NeuralNetworkFuture_0000000000000nan_tr_1_InputLevel_0000000000000000_tr_1_InputUnit_0000000000000000_tr_1_0_inp\n", - "Received: inputs=('Tensor(shape=(None, 40))',)\n", - " warnings.warn(msg)\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 1s/step - categorical_accuracy: 0.0000e+00 - loss: 11.7384 - perplexity: 329529.3125 - val_categorical_accuracy: 0.1667 - val_loss: 11.7688 - val_perplexity: 129164.8281\n", - "Epoch 2/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 595ms/step - categorical_accuracy: 0.1913 - loss: 11.2528 - perplexity: 77375.8594 - val_categorical_accuracy: 0.1667 - val_loss: 11.7502 - val_perplexity: 126778.7031\n", - "Epoch 3/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 518ms/step - categorical_accuracy: 0.2191 - loss: 10.8135 - perplexity: 50491.5156 - val_categorical_accuracy: 0.1667 - val_loss: 11.7425 - val_perplexity: 125805.1797\n", - "Epoch 4/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 522ms/step - categorical_accuracy: 0.1864 - loss: 10.2940 - perplexity: 30868.9629 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7451 - val_perplexity: 126128.5781\n", - "Epoch 5/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 820ms/step - categorical_accuracy: 0.1913 - loss: 9.7216 - perplexity: 16997.6719 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7362 - val_perplexity: 125020.5859\n", - "Epoch 6/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 645ms/step - categorical_accuracy: 0.1407 - loss: 8.9741 - perplexity: 8181.5312 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7171 - val_perplexity: 122652.5234\n", - "Epoch 7/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 593ms/step - categorical_accuracy: 0.1830 - loss: 8.4567 - perplexity: 4759.8066 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6908 - val_perplexity: 119465.5703\n", - "Epoch 8/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 685ms/step - categorical_accuracy: 0.0506 - loss: 8.2385 - perplexity: 4355.3149 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6441 - val_perplexity: 114018.0000\n", - "Epoch 9/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 651ms/step - categorical_accuracy: 0.2141 - loss: 7.1757 - perplexity: 1335.3220 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6267 - val_perplexity: 112051.2734\n", - "Epoch 10/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 545ms/step - categorical_accuracy: 0.1830 - loss: 7.3339 - perplexity: 1963.8916 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6375 - val_perplexity: 113263.3828\n", - "Epoch 11/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 565ms/step - categorical_accuracy: 0.2669 - loss: 6.6371 - perplexity: 870.7467 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6597 - val_perplexity: 115809.4375\n", - "Epoch 12/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 653ms/step - categorical_accuracy: 0.1719 - loss: 5.9232 - perplexity: 380.9991 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7503 - val_perplexity: 126796.4766\n", - "Epoch 13/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 626ms/step - categorical_accuracy: 0.0839 - loss: 7.4954 - perplexity: 2688.5974 - val_categorical_accuracy: 0.1667 - val_loss: 11.8025 - val_perplexity: 133587.0156\n", - "Epoch 14/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 555ms/step - categorical_accuracy: 0.0963 - loss: 6.5658 - perplexity: 758.4783 - val_categorical_accuracy: 0.1667 - val_loss: 11.8975 - val_perplexity: 146902.5625\n", - "Epoch 15/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 599ms/step - categorical_accuracy: 0.2419 - loss: 4.4233 - perplexity: 101.5967 - val_categorical_accuracy: 0.1667 - val_loss: 11.9977 - val_perplexity: 162383.4688\n", - "Epoch 16/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 597ms/step - categorical_accuracy: 0.2018 - loss: 4.8811 - perplexity: 147.2505 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.2702 - val_perplexity: 213244.7812\n", - "Epoch 17/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 514ms/step - categorical_accuracy: 0.2419 - loss: 4.8847 - perplexity: 212.5692 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.4334 - val_perplexity: 251053.4531\n", - "Epoch 18/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 516ms/step - categorical_accuracy: 0.1725 - loss: 5.0510 - perplexity: 216.2864 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.5886 - val_perplexity: 293192.5625\n", - "Epoch 19/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 1s/step - categorical_accuracy: 0.3348 - loss: 4.1482 - perplexity: 66.5400 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.7669 - val_perplexity: 350434.6875\n", - "Epoch 20/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 518ms/step - categorical_accuracy: 0.2364 - loss: 6.1440 - perplexity: 556.4460 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1211 - val_perplexity: 499357.5000\n", - "Epoch 21/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 566ms/step - categorical_accuracy: 0.2752 - loss: 4.0937 - perplexity: 103.8000 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.2722 - val_perplexity: 580840.3125\n", - "Epoch 22/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 584ms/step - categorical_accuracy: 0.3582 - loss: 3.5086 - perplexity: 42.0227 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3929 - val_perplexity: 655350.3750\n", - "Epoch 23/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 574ms/step - categorical_accuracy: 0.2357 - loss: 3.6651 - perplexity: 42.0124 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5131 - val_perplexity: 739037.1875\n", - "Epoch 24/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 555ms/step - categorical_accuracy: 0.3743 - loss: 4.2759 - perplexity: 78.8337 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6073 - val_perplexity: 812073.0625\n", - "Epoch 25/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 619ms/step - categorical_accuracy: 0.2814 - loss: 6.1106 - perplexity: 702.3881 - val_categorical_accuracy: 0.1667 - val_loss: 13.6209 - val_perplexity: 823172.5625\n", - "Epoch 26/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 689ms/step - categorical_accuracy: 0.2647 - loss: 6.2123 - perplexity: 835.9423 - val_categorical_accuracy: 0.1667 - val_loss: 13.5922 - val_perplexity: 799900.9375\n", - "Epoch 27/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 824ms/step - categorical_accuracy: 0.3014 - loss: 3.9091 - perplexity: 57.1766 - val_categorical_accuracy: 0.1667 - val_loss: 13.5968 - val_perplexity: 803528.0625\n", - "Epoch 28/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 594ms/step - categorical_accuracy: 0.2864 - loss: 5.1544 - perplexity: 186.2288 - val_categorical_accuracy: 0.1667 - val_loss: 13.5879 - val_perplexity: 796426.8750\n", - "Epoch 29/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 973ms/step - categorical_accuracy: 0.2314 - loss: 5.0346 - perplexity: 261.9535 - val_categorical_accuracy: 0.1667 - val_loss: 13.5785 - val_perplexity: 788957.9375\n", - "Epoch 30/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 1s/step - categorical_accuracy: 0.3508 - loss: 3.9460 - perplexity: 55.9352 - val_categorical_accuracy: 0.1667 - val_loss: 13.5878 - val_perplexity: 796315.2500\n", - "Epoch 31/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 684ms/step - categorical_accuracy: 0.2141 - loss: 3.3061 - perplexity: 29.5618 - val_categorical_accuracy: 0.1667 - val_loss: 13.5959 - val_perplexity: 802850.9375\n", - "Epoch 32/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 659ms/step - categorical_accuracy: 0.1719 - loss: 4.1759 - perplexity: 72.8835 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.7057 - val_perplexity: 896031.1250\n", - "Epoch 33/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 725ms/step - categorical_accuracy: 0.1302 - loss: 5.0193 - perplexity: 177.1105 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.7885 - val_perplexity: 973393.5000\n", - "Epoch 34/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 632ms/step - categorical_accuracy: 0.2919 - loss: 2.9201 - perplexity: 24.4465 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.9237 - val_perplexity: 1114295.3750\n", - "Epoch 35/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 639ms/step - categorical_accuracy: 0.3197 - loss: 3.6359 - perplexity: 60.7448 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0890 - val_perplexity: 1314598.2500\n", - "Epoch 36/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 527ms/step - categorical_accuracy: 0.2364 - loss: 3.6853 - perplexity: 93.4177 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.1742 - val_perplexity: 1431418.1250\n", - "Epoch 37/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 605ms/step - categorical_accuracy: 0.2731 - loss: 3.3295 - perplexity: 31.0892 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2398 - val_perplexity: 1528469.6250\n", - "Epoch 38/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 755ms/step - categorical_accuracy: 0.5054 - loss: 4.2462 - perplexity: 96.5757 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3218 - val_perplexity: 1659098.5000\n", - "Epoch 39/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 606ms/step - categorical_accuracy: 0.3638 - loss: 3.2328 - perplexity: 26.5526 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3728 - val_perplexity: 1745870.1250\n", - "Epoch 40/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 745ms/step - categorical_accuracy: 0.5727 - loss: 1.9158 - perplexity: 9.5471 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5209 - val_perplexity: 2024707.8750\n", - "Epoch 41/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 671ms/step - categorical_accuracy: 0.1068 - loss: 5.0107 - perplexity: 172.7614 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5204 - val_perplexity: 2023570.8750\n", - "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000001_subtrial_0000000000000000.txt\n", - "returning trial 1 oracles\n", - " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", - "0 0.000000 11.719700 225372.531250 0.166667 \n", - "1 0.173913 11.155995 69982.140625 0.166667 \n", - "2 0.173913 10.995764 59621.039062 0.166667 \n", - "3 0.217391 10.042144 22974.583984 0.000000 \n", - "4 0.173913 9.805058 18125.181641 0.000000 \n", - "5 0.130435 9.198784 9885.100586 0.000000 \n", - "6 0.173913 8.641828 5663.671387 0.000000 \n", - "7 0.043478 8.808529 6691.075195 0.000000 \n", - "8 0.217391 7.256882 1417.828491 0.000000 \n", - "9 0.173913 6.904544 996.794250 0.000000 \n", - "10 0.217391 6.873430 966.256958 0.000000 \n", - "11 0.173913 5.982946 396.607025 0.000000 \n", - "12 0.043478 6.824471 920.089539 0.166667 \n", - "13 0.130435 6.259269 522.836731 0.166667 \n", - "14 0.217391 5.205779 182.322769 0.166667 \n", - "15 0.130435 5.462027 235.574463 0.000000 \n", - "16 0.217391 6.074162 434.485474 0.000000 \n", - "17 0.217391 5.354462 211.550262 0.000000 \n", - "18 0.304348 4.318021 75.040001 0.000000 \n", - "19 0.217391 5.875260 356.117035 0.000000 \n", - "20 0.217391 5.246053 189.815536 0.000000 \n", - "21 0.391304 4.035575 56.575462 0.000000 \n", - "22 0.173913 3.672752 39.360092 0.000000 \n", - "23 0.347826 4.800797 121.607239 0.000000 \n", - "24 0.260870 6.058529 427.745911 0.166667 \n", - "25 0.260870 6.874752 967.535400 0.166667 \n", - "26 0.304348 3.871903 48.033691 0.166667 \n", - "27 0.217391 5.597022 269.622284 0.166667 \n", - "28 0.260870 4.006342 54.945507 0.166667 \n", - "29 0.260870 4.286894 72.740173 0.166667 \n", - "30 0.217391 3.180355 24.055300 0.166667 \n", - "31 0.173913 4.073040 58.735218 0.000000 \n", - "32 0.173913 5.302594 200.857193 0.000000 \n", - "33 0.217391 3.763384 43.094006 0.000000 \n", - "34 0.217391 4.363249 78.511826 0.000000 \n", - "35 0.217391 5.450110 232.783875 0.000000 \n", - "36 0.260870 3.634080 37.866989 0.000000 \n", - "37 0.391304 5.082735 161.214310 0.000000 \n", - "38 0.391304 3.312840 27.463017 0.000000 \n", - "39 0.434783 2.846823 17.232950 0.000000 \n", - "40 0.086957 5.169964 175.908478 0.000000 \n", - "\n", - " val_loss val_perplexity trial_number subtrial_number \\\n", - "0 11.768844 1.291648e+05 1 0 \n", - "1 11.750198 1.267787e+05 1 0 \n", - "2 11.742490 1.258052e+05 1 0 \n", - "3 11.745057 1.261286e+05 1 0 \n", - "4 11.736234 1.250206e+05 1 0 \n", - "5 11.717111 1.226525e+05 1 0 \n", - "6 11.690784 1.194656e+05 1 0 \n", - "7 11.644112 1.140180e+05 1 0 \n", - "8 11.626712 1.120513e+05 1 0 \n", - "9 11.637473 1.132634e+05 1 0 \n", - "10 11.659701 1.158094e+05 1 0 \n", - "11 11.750339 1.267965e+05 1 0 \n", - "12 11.802508 1.335870e+05 1 0 \n", - "13 11.897525 1.469026e+05 1 0 \n", - "14 11.997716 1.623835e+05 1 0 \n", - "15 12.270196 2.132448e+05 1 0 \n", - "16 12.433421 2.510535e+05 1 0 \n", - "17 12.588585 2.931926e+05 1 0 \n", - "18 12.766930 3.504347e+05 1 0 \n", - "19 13.121078 4.993575e+05 1 0 \n", - "20 13.272231 5.808403e+05 1 0 \n", - "21 13.392925 6.553504e+05 1 0 \n", - "22 13.513103 7.390372e+05 1 0 \n", - "23 13.607346 8.120731e+05 1 0 \n", - "24 13.620921 8.231726e+05 1 0 \n", - "25 13.592243 7.999009e+05 1 0 \n", - "26 13.596767 8.035281e+05 1 0 \n", - "27 13.587891 7.964269e+05 1 0 \n", - "28 13.578468 7.889579e+05 1 0 \n", - "29 13.587750 7.963152e+05 1 0 \n", - "30 13.595924 8.028509e+05 1 0 \n", - "31 13.705730 8.960311e+05 1 0 \n", - "32 13.788544 9.733935e+05 1 0 \n", - "33 13.923733 1.114295e+06 1 0 \n", - "34 14.089040 1.314598e+06 1 0 \n", - "35 14.174176 1.431418e+06 1 0 \n", - "36 14.239779 1.528470e+06 1 0 \n", - "37 14.321785 1.659098e+06 1 0 \n", - "38 14.372764 1.745870e+06 1 0 \n", - "39 14.520935 2.024708e+06 1 0 \n", - "40 14.520374 2.023571e+06 1 0 \n", - "\n", - " model_name \n", - "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", - " self.pid = os.fork()\n", - "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", - " self.pid = os.fork()\n", - "Global task progress: 67%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ \u001b[0m| 2/3 [07:42<03:50, 230.58s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "SimpleCerebrosRandomSearch.input_shapes: [(40,)]\n", - "nan\n", - ">nnf>ceil\n", - "k is: 0 value is: [{'1': }]\n", - "0\n", - "k is: 1 value is: [{'2': }, {'2': }]\n", - "1\n", - "Trying to create level 1\n", - "We think level 1's predecessors are: [0]\n", - "k is: 2 value is: [{'128260': }]\n", - "2\n", - "Trying to create Final level 2\n", - "Trying to create level 2\n", - "We think level final level 2's predecessors are: [0, 1]\n", - "levels:\n", - "[0, 1, 2]\n", - "{'0': 'InputUnitModule'}\n", - "InputLevel.input_shapes [(40,)]\n", - "{'2': }\n", - "{'2': }\n", - "Debug: I am 2 selecting 1\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", - "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", - "Debug: successor_connectivity_errors_2d []\n", - "$$$$$$>>>>> Base model: \n", - "InputUnit.input_shape: (40,)\n", - "{'2': }\n", - "{'2': }\n", - "debug: meta_level_number\n", - "debug: meta_level_number\n", - "Debug: successor_connectivity_errors_2d []\n", - "Debug: successor_connectivity_errors_2d []\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_DenseLevel_0000000000000001_tr_2_DenseUnit_0000000000000001_tr_2_0 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_DenseLevel_0000000000000001_tr_2_DenseUnit_0000000000000001_tr_2_1 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "{'128260': }\n", - "debug: meta_level_number\n", - "Debug: successor_connectivity_errors_2d []\n", - "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_FinalDenseLevel_0000000000000002_tr_2_FinalDenseUnit_0000000000000002_tr_2_0 called\n", - "materialized network layers\n", - "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", - "inputs\n", - "\n", - "\n", - "outputs\n", - "\n" - ] - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_2_nn_materialized\"\u001b[0m\n" - ], - "text/html": [ - "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_2_nn_materialized\"\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", - "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", - "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", - "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", - "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", - "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", - "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" - ], - "text/html": [ - "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
-              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
-              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
-              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0]  โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0]  โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
-              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
-              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
-              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" - ], - "text/html": [ - "
 Total params: 52,476,644 (200.18 MB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" - ], - "text/html": [ - "
 Trainable params: 52,474,124 (200.17 MB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" - ], - "text/html": [ - "
 Non-trainable params: 2,520 (9.84 KB)\n",
-              "
\n" - ] - }, - "metadata": {} - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "None\n", - "Epoch 1/41\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", - "Expected: NeuralNetworkFuture_0000000000000nan_tr_2_InputLevel_0000000000000000_tr_2_InputUnit_0000000000000000_tr_2_0_inp\n", - "Received: inputs=('Tensor(shape=(None, 40))',)\n", - " warnings.warn(msg)\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 1s/step - categorical_accuracy: 0.0000e+00 - loss: 11.7705 - perplexity: 321629.5625 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7229 - val_perplexity: 123359.4219\n", - "Epoch 2/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 553ms/step - categorical_accuracy: 0.2203 - loss: 11.1997 - perplexity: 73499.6797 - val_categorical_accuracy: 0.1667 - val_loss: 11.6443 - val_perplexity: 114043.3438\n", - "Epoch 3/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 810ms/step - categorical_accuracy: 0.0568 - loss: 10.8859 - perplexity: 55947.3047 - val_categorical_accuracy: 0.1667 - val_loss: 11.6099 - val_perplexity: 110179.2891\n", - "Epoch 4/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 764ms/step - categorical_accuracy: 0.0857 - loss: 10.3024 - perplexity: 32385.9180 - val_categorical_accuracy: 0.1667 - val_loss: 11.5822 - val_perplexity: 107167.9375\n", - "Epoch 5/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 804ms/step - categorical_accuracy: 0.4326 - loss: 9.0836 - perplexity: 8933.4072 - val_categorical_accuracy: 0.1667 - val_loss: 11.5889 - val_perplexity: 107891.9219\n", - "Epoch 6/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 748ms/step - categorical_accuracy: 0.2370 - loss: 8.2757 - perplexity: 3973.7712 - val_categorical_accuracy: 0.1667 - val_loss: 11.6114 - val_perplexity: 110344.8047\n", - "Epoch 7/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 697ms/step - categorical_accuracy: 0.3215 - loss: 7.9711 - perplexity: 3110.2710 - val_categorical_accuracy: 0.1667 - val_loss: 11.6354 - val_perplexity: 113026.3203\n", - "Epoch 8/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 847ms/step - categorical_accuracy: 0.3126 - loss: 7.4265 - perplexity: 1791.7644 - val_categorical_accuracy: 0.1667 - val_loss: 11.7362 - val_perplexity: 125022.5000\n", - "Epoch 9/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 671ms/step - categorical_accuracy: 0.3715 - loss: 6.4991 - perplexity: 682.2664 - val_categorical_accuracy: 0.1667 - val_loss: 11.8366 - val_perplexity: 138215.8281\n", - "Epoch 10/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 661ms/step - categorical_accuracy: 0.0734 - loss: 6.6783 - perplexity: 959.8943 - val_categorical_accuracy: 0.1667 - val_loss: 11.9409 - val_perplexity: 153410.4375\n", - "Epoch 11/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 672ms/step - categorical_accuracy: 0.2731 - loss: 5.4216 - perplexity: 253.3555 - val_categorical_accuracy: 0.1667 - val_loss: 12.0696 - val_perplexity: 174486.5469\n", - "Epoch 12/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 771ms/step - categorical_accuracy: 0.2197 - loss: 5.1631 - perplexity: 218.4438 - val_categorical_accuracy: 0.1667 - val_loss: 12.3783 - val_perplexity: 237591.3281\n", - "Epoch 13/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 590ms/step - categorical_accuracy: 0.0990 - loss: 6.2295 - perplexity: 540.8942 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.5687 - val_perplexity: 287426.0312\n", - "Epoch 14/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 779ms/step - categorical_accuracy: 0.1786 - loss: 6.4879 - perplexity: 5995.6064 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.7386 - val_perplexity: 340639.4062\n", - "Epoch 15/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 748ms/step - categorical_accuracy: 0.1752 - loss: 5.1388 - perplexity: 226.8788 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.8619 - val_perplexity: 385366.4375\n", - "Epoch 16/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 621ms/step - categorical_accuracy: 0.1641 - loss: 5.7236 - perplexity: 863.9962 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1398 - val_perplexity: 508785.5938\n", - "Epoch 17/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 689ms/step - categorical_accuracy: 0.4921 - loss: 2.9571 - perplexity: 20.0698 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3148 - val_perplexity: 606077.4375\n", - "Epoch 18/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 844ms/step - categorical_accuracy: 0.3659 - loss: 4.9595 - perplexity: 819.0781 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5068 - val_perplexity: 734419.0000\n", - "Epoch 19/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 593ms/step - categorical_accuracy: 0.3014 - loss: 4.9173 - perplexity: 152.2117 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6640 - val_perplexity: 859409.2500\n", - "Epoch 20/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 847ms/step - categorical_accuracy: 0.2308 - loss: 3.7793 - perplexity: 60.0206 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.9386 - val_perplexity: 1131028.2500\n", - "Epoch 21/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 918ms/step - categorical_accuracy: 0.3832 - loss: 3.4479 - perplexity: 80.4731 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0840 - val_perplexity: 1307933.0000\n", - "Epoch 22/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 560ms/step - categorical_accuracy: 0.3860 - loss: 4.3510 - perplexity: 90.9878 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.1620 - val_perplexity: 1414104.8750\n", - "Epoch 23/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 635ms/step - categorical_accuracy: 0.4443 - loss: 2.9553 - perplexity: 23.0736 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2588 - val_perplexity: 1557883.3750\n", - "Epoch 24/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 636ms/step - categorical_accuracy: 0.4983 - loss: 2.2404 - perplexity: 9.9262 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3779 - val_perplexity: 1754904.3750\n", - "Epoch 25/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 623ms/step - categorical_accuracy: 0.2909 - loss: 5.2172 - perplexity: 247.5778 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.4472 - val_perplexity: 1880756.3750\n", - "Epoch 26/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 806ms/step - categorical_accuracy: 0.3048 - loss: 3.1858 - perplexity: 25.7062 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5364 - val_perplexity: 2056196.2500\n", - "Epoch 27/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 843ms/step - categorical_accuracy: 0.1185 - loss: 3.0064 - perplexity: 25.4380 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5902 - val_perplexity: 2169912.7500\n", - "Epoch 28/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 866ms/step - categorical_accuracy: 0.5033 - loss: 2.9283 - perplexity: 35.1612 - val_categorical_accuracy: 0.1667 - val_loss: 14.6578 - val_perplexity: 2321627.0000\n", - "Epoch 29/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 794ms/step - categorical_accuracy: 0.3320 - loss: 3.2803 - perplexity: 27.8907 - val_categorical_accuracy: 0.1667 - val_loss: 14.7196 - val_perplexity: 2469625.0000\n", - "Epoch 30/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 660ms/step - categorical_accuracy: 0.2752 - loss: 5.4753 - perplexity: 249.7908 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.8572 - val_perplexity: 2834024.2500\n", - "Epoch 31/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 748ms/step - categorical_accuracy: 0.2925 - loss: 5.2035 - perplexity: 302.8727 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.9761 - val_perplexity: 3191841.5000\n", - "Epoch 32/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 585ms/step - categorical_accuracy: 0.2715 - loss: 3.0830 - perplexity: 22.1130 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.0934 - val_perplexity: 3589043.2500\n", - "Epoch 33/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 837ms/step - categorical_accuracy: 0.3638 - loss: 2.0138 - perplexity: 7.6831 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.1927 - val_perplexity: 3963894.5000\n", - "Epoch 34/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 622ms/step - categorical_accuracy: 0.4165 - loss: 2.3430 - perplexity: 12.4422 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.2933 - val_perplexity: 4383348.5000\n", - "Epoch 35/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 730ms/step - categorical_accuracy: 0.4832 - loss: 3.8156 - perplexity: 57.3130 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4055 - val_perplexity: 4903895.5000\n", - "Epoch 36/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 831ms/step - categorical_accuracy: 0.1641 - loss: 4.5182 - perplexity: 317.0210 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4245 - val_perplexity: 4998003.5000\n", - "Epoch 37/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 621ms/step - categorical_accuracy: 0.2752 - loss: 2.9753 - perplexity: 25.5228 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4590 - val_perplexity: 5173094.0000\n", - "Epoch 38/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 666ms/step - categorical_accuracy: 0.2280 - loss: 2.4058 - perplexity: 11.6680 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4232 - val_perplexity: 4991435.0000\n", - "Epoch 39/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 565ms/step - categorical_accuracy: 0.3592 - loss: 3.6356 - perplexity: 40.6227 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4089 - val_perplexity: 4920268.0000\n", - "Epoch 40/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 665ms/step - categorical_accuracy: 0.2691 - loss: 3.4659 - perplexity: 47.4784 - val_categorical_accuracy: 0.1667 - val_loss: 15.3797 - val_perplexity: 4778703.0000\n", - "Epoch 41/41\n", - "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 693ms/step - categorical_accuracy: 0.4310 - loss: 2.7924 - perplexity: 20.8595 - val_categorical_accuracy: 0.1667 - val_loss: 15.4324 - val_perplexity: 5037467.5000\n", - "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000002_subtrial_0000000000000000.txt\n", - "returning trial 2 oracles\n", - " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", - "0 0.000000 11.761698 226943.984375 0.000000 \n", - "1 0.260870 11.090140 65521.941406 0.166667 \n", - "2 0.086957 10.702163 44451.890625 0.166667 \n", - "3 0.173913 9.990288 21813.576172 0.166667 \n", - "4 0.347826 9.246581 10369.053711 0.166667 \n", - "5 0.260870 8.266317 3890.594971 0.166667 \n", - "6 0.347826 7.704062 2217.337646 0.166667 \n", - "7 0.304348 7.122604 1239.675415 0.166667 \n", - "8 0.347826 6.225540 505.495789 0.166667 \n", - "9 0.086957 6.562615 708.120972 0.166667 \n", - "10 0.260870 5.511858 247.610764 0.166667 \n", - "11 0.217391 5.295715 199.480270 0.166667 \n", - "12 0.130435 6.463620 641.378784 0.000000 \n", - "13 0.260870 5.026217 152.355484 0.000000 \n", - "14 0.217391 5.910099 368.742676 0.000000 \n", - "15 0.217391 4.887461 132.616455 0.000000 \n", - "16 0.434783 3.347833 28.441027 0.000000 \n", - "17 0.347826 4.313054 74.668182 0.000000 \n", - "18 0.304348 4.665129 106.179253 0.000000 \n", - "19 0.217391 4.334057 76.253006 0.000000 \n", - "20 0.391304 2.807739 16.572411 0.000000 \n", - "21 0.391304 4.215992 67.761353 0.000000 \n", - "22 0.391304 3.522572 33.871445 0.000000 \n", - "23 0.478261 2.265072 9.631822 0.000000 \n", - "24 0.347826 5.091538 162.639801 0.000000 \n", - "25 0.347826 2.982907 19.745134 0.000000 \n", - "26 0.130435 3.861120 47.518566 0.000000 \n", - "27 0.434783 4.315707 74.866554 0.166667 \n", - "28 0.304348 3.004366 20.173416 0.166667 \n", - "29 0.217391 5.262289 192.922501 0.000000 \n", - "30 0.260870 5.697386 298.087250 0.000000 \n", - "31 0.347826 3.149921 23.334219 0.000000 \n", - "32 0.391304 2.063896 7.876601 0.000000 \n", - "33 0.391304 3.141111 23.129541 0.000000 \n", - "34 0.391304 3.663168 38.984657 0.000000 \n", - "35 0.217391 3.455597 31.677193 0.000000 \n", - "36 0.217391 3.796592 44.549091 0.000000 \n", - "37 0.217391 2.545129 12.744876 0.000000 \n", - "38 0.260870 4.018140 55.597588 0.000000 \n", - "39 0.173913 3.072442 21.594568 0.166667 \n", - "40 0.434783 2.709372 15.019834 0.166667 \n", - "\n", - " val_loss val_perplexity trial_number subtrial_number \\\n", - "0 11.722857 1.233594e+05 2 0 \n", - "1 11.644334 1.140433e+05 2 0 \n", - "2 11.609864 1.101793e+05 2 0 \n", - "3 11.582150 1.071679e+05 2 0 \n", - "4 11.588885 1.078919e+05 2 0 \n", - "5 11.611365 1.103448e+05 2 0 \n", - "6 11.635376 1.130263e+05 2 0 \n", - "7 11.736249 1.250225e+05 2 0 \n", - "8 11.836572 1.382158e+05 2 0 \n", - "9 11.940872 1.534104e+05 2 0 \n", - "10 12.069603 1.744865e+05 2 0 \n", - "11 12.378307 2.375913e+05 2 0 \n", - "12 12.568721 2.874260e+05 2 0 \n", - "13 12.738580 3.406394e+05 2 0 \n", - "14 12.861950 3.853664e+05 2 0 \n", - "15 13.139782 5.087856e+05 2 0 \n", - "16 13.314763 6.060774e+05 2 0 \n", - "17 13.506835 7.344190e+05 2 0 \n", - "18 13.664001 8.594092e+05 2 0 \n", - "19 13.938638 1.131028e+06 2 0 \n", - "20 14.083958 1.307933e+06 2 0 \n", - "21 14.162007 1.414105e+06 2 0 \n", - "22 14.258838 1.557883e+06 2 0 \n", - "23 14.377925 1.754904e+06 2 0 \n", - "24 14.447185 1.880756e+06 2 0 \n", - "25 14.536368 2.056196e+06 2 0 \n", - "26 14.590198 2.169913e+06 2 0 \n", - "27 14.657779 2.321627e+06 2 0 \n", - "28 14.719577 2.469625e+06 2 0 \n", - "29 14.857208 2.834024e+06 2 0 \n", - "30 14.976109 3.191842e+06 2 0 \n", - "31 15.093396 3.589043e+06 2 0 \n", - "32 15.192738 3.963894e+06 2 0 \n", - "33 15.293323 4.383348e+06 2 0 \n", - "34 15.405540 4.903896e+06 2 0 \n", - "35 15.424549 4.998004e+06 2 0 \n", - "36 15.458982 5.173094e+06 2 0 \n", - "37 15.423234 4.991435e+06 2 0 \n", - "38 15.408874 4.920268e+06 2 0 \n", - "39 15.379680 4.778703e+06 2 0 \n", - "40 15.432412 5.037468e+06 2 0 \n", - "\n", - " model_name \n", - "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", - "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", - " self.pid = os.fork()\n", - "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", - " self.pid = os.fork()\n", - "Global task progress: 100%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ\u001b[0m| 3/3 [12:11<00:00, 243.86s/it]" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Index(['categorical_accuracy', 'loss', 'perplexity',\n", - " 'val_categorical_accuracy', 'val_loss', 'val_perplexity',\n", - " 'trial_number', 'subtrial_number', 'model_name'],\n", - " dtype='object')\n", - "metric_to_rank_by is: 'perplexity'\n", - "Type of metric_to_rank_by is: \n", - "metric_to_rank_by is: 'perplexity'\n", - "Type of metric_to_rank_by is: \n", - "Best result this trial was: 7.876600742340088\n", - "Type of best result: \n", - "Best model name: 2025_11_23 16_55_cerebros_not-gpt_meta_42/models/tr_0000000000000002_subtrial_0000000000000000.keras\n", - "Cerebros trained 3 models in 12.19 min. Average time per model: 4.06 min.\n", - "Cerebros best perplexity achieved in Phase I-a is 7.876600742340088\n" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Training Stage I-a - Model Evaluation (Subjective):\n", - "\n", - "- We retrieve the best model found during the NAS phase and test its text generation capabilities from a subjective standpoint.\n", - "- Keep in mind, this is trained on 10 text samples. It is impressive that it can generate anything, especially subjects and verbs that are on-topic and agree, and is otherwise sensible, despite being grammatically gibberish.\n", - "\n", - "FYI: The generative components we imported from cerebrosllmutils:\n", - "\n", - "## Model config\n", - "```python\n", - "\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='CerebrosNotGPTConfig')\n", - "class CerebrosNotGPTConfig:\n", - " def __init__(self, max_sequence_length=1536, padding_token=None):\n", - " self.max_sequence_length = max_sequence_length\n", - " self.padding_token = padding_token\n", - "\n", - " def get_config(self):\n", - " return {\n", - " 'max_sequence_length': self.max_sequence_length,\n", - " 'padding_token': self.padding_token\n", - " }\n", - "\n", - " @classmethod\n", - " def from_config(cls, config):\n", - " return cls(**config)\n", - "```\n", - "\n", - "## Model class we imported from cerebrosllmutil, having:\n", - "\n", - "- Greedy sampling\n", - "- Temperature scaling\n", - "- Top p sampling\n", - "- Top k sampling\n", - "- Presence penlaty\n", - "- Frequency penalty\n", - "- Repetition penalty\n", - "\n", - "```python\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='CerebrosNotGPT')\n", - "class CerebrosNotGPT(tf.keras.Model):\n", - " def __init__(self, config: Any, model: Any = None, **kwargs):\n", - " # 1. Store the nested model argument.\n", - " self.config = config\n", - " self.model = model\n", - " \n", - " # 2. Extract and remove custom kwargs (like 'model') before calling super.\n", - " # This is important to prevent 'unrecognized keyword argument' errors.\n", - " # The nested model is already extracted and stored, so it can be safely removed.\n", - " kwargs.pop('model', None)\n", - " \n", - " # 3. Call the parent constructor with the cleaned kwargs.\n", - " super().__init__(**kwargs)\n", - "\n", - " self.max_sequence_length = config.max_sequence_length\n", - " self.padding_token = config.padding_token\n", - "\n", - " def get_config(self):\n", - " base_config = super().get_config()\n", - " config_dict = {\n", - " 'config': self.config.get_config(),\n", - " }\n", - " \n", - " # Explicitly handle nested model serialization.\n", - " # This is required if Keras's automatic tracking fails.\n", - " if self.model is not None:\n", - " # Note: This approach might still suffer from weight loss.\n", - " # The recommended way is to let Keras handle it automatically.\n", - " config_dict['model'] = tf.keras.utils.serialize_keras_object(self.model)\n", - "\n", - " base_config.update(config_dict)\n", - " return base_config\n", - "\n", - " @classmethod\n", - " def from_config(cls, config):\n", - " # Separate the custom config.\n", - " config_obj_dict = config.pop('config')\n", - " config_obj = CerebrosNotGPTConfig.from_config(config_obj_dict)\n", - " \n", - " # Manually extract and load the nested model.\n", - " nested_model_config = config.pop('model', None)\n", - " if nested_model_config:\n", - " nested_model = tf.keras.utils.deserialize_keras_object(nested_model_config)\n", - " else:\n", - " nested_model = None\n", - " \n", - " # Reconstruct the outer model by passing the restored parts.\n", - " return cls(config=config_obj, model=nested_model, **config)\n", - "\n", - " def call(self, inputs, training=False):\n", - " if self.model is None:\n", - " raise ValueError(\"Inner model not initialized properly\")\n", - " return self.model(inputs, training=training)\n", - "\n", - " @staticmethod\n", - " def apply_top_k_probs(probs, k):\n", - " if k is None or k <= 0:\n", - " return probs\n", - " # Flatten and argsort for indices\n", - " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", - " keep_indices = sorted_indices[:k]\n", - " mask = tf.zeros_like(probs, dtype=tf.bool)\n", - " mask = tf.tensor_scatter_nd_update(mask, tf.reshape(keep_indices, (-1, 1)),\n", - " tf.ones((k,), dtype=tf.bool))\n", - " filtered_probs = tf.where(mask, probs, tf.zeros_like(probs))\n", - " # Renormalize\n", - " filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)\n", - " return filtered_probs\n", - "\n", - " @staticmethod\n", - " def apply_top_p_probs(probs, p):\n", - " if p is None or p >= 1.0:\n", - " return probs\n", - " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", - " sorted_probs = tf.gather(probs, sorted_indices)\n", - " cumulative_probs = tf.cumsum(sorted_probs)\n", - " mask = cumulative_probs <= p\n", - " # Always keep at least 1 token\n", - " mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)\n", - " keep_indices = tf.boolean_mask(sorted_indices, mask)\n", - " filtered_probs = tf.where(\n", - " tf.reduce_any(tf.equal(tf.range(tf.shape(probs)[0])[:, None], keep_indices), axis=1), probs,\n", - " tf.zeros_like(probs))\n", - " # Renormalize\n", - " filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)\n", - " return filtered_probs\n", - "\n", - " def generate(self,\n", - " token_ids,\n", - " do_sample=False,\n", - " max_new_tokens=None,\n", - " temperature=1.0,\n", - " top_k=None,\n", - " top_p=None,\n", - " frequency_penalty=None,\n", - " presence_penalty=None,\n", - " repetition_penalty=None):\n", - " \"\"\"\n", - " Generate text autoregressively from token IDs.\n", - " Applies filtering in sequence: penalties -> temperature -> top-k -> top-p\n", - " \"\"\"\n", - " # Convert token_ids to list if it's not already\n", - " if not isinstance(token_ids, list):\n", - " token_ids = list(token_ids)\n", - "\n", - " # Determine the actual maximum number of new tokens\n", - " if max_new_tokens is None:\n", - " max_new_tokens = self.max_sequence_length - len(token_ids)\n", - " else:\n", - " max_new_tokens = min(max_new_tokens, self.max_sequence_length - len(token_ids))\n", - "\n", - " # Initialize the generated tokens list\n", - " generated_tokens = []\n", - " current_tokens = token_ids.copy()\n", - "\n", - " # Autoregressive generation loop\n", - " for _ in range(max_new_tokens):\n", - " # Pad or truncate to max_sequence_length\n", - " if len(current_tokens) > self.max_sequence_length:\n", - " input_tokens = current_tokens[-self.max_sequence_length:]\n", - " else:\n", - " padding_needed = self.max_sequence_length - len(current_tokens)\n", - " input_tokens = current_tokens + [self.padding_token] * padding_needed\n", - "\n", - " # Convert to tensor and get model prediction\n", - " input_tensor = tf.constant([input_tokens], dtype=tf.int32)\n", - " probs_nested = self.model(input_tensor)\n", - " probs = probs_nested[0] # Already softmax probabilities (NOT logits as comment says)\n", - " logits = tf.math.log(probs + 10 ** -20) # Convert to logits for penalty application\n", - "\n", - " if do_sample:\n", - " # Apply repetition/frequency/presence penalties to logits\n", - " if frequency_penalty is not None or presence_penalty is not None:\n", - " # Collect token counts from current_tokens\n", - " token_counts = {}\n", - " for t in current_tokens:\n", - " token_counts[t] = token_counts.get(t, 0) + 1\n", - "\n", - " # Prepare penalty tensor\n", - " vocab_size = tf.shape(logits)[0]\n", - " penalties = tf.zeros_like(logits)\n", - "\n", - " for token_id, count in token_counts.items():\n", - " if token_id >= vocab_size:\n", - " continue\n", - " penalty = 0.0\n", - " if presence_penalty is not None:\n", - " penalty += presence_penalty\n", - " if frequency_penalty is not None:\n", - " penalty += frequency_penalty * count\n", - "\n", - " penalties = tf.tensor_scatter_nd_add(\n", - " penalties,\n", - " [[token_id]],\n", - " [penalty]\n", - " )\n", - "\n", - " # Subtract penalties from logits\n", - " logits = logits - penalties\n", - "\n", - " # Apply repetition penalty (standard approach)\n", - " if repetition_penalty is not None and repetition_penalty != 1.0:\n", - " # Collect unique tokens that have appeared\n", - " unique_tokens = list(set(current_tokens))\n", - " vocab_size = tf.shape(logits)[0]\n", - "\n", - " for token_id in unique_tokens:\n", - " if token_id < vocab_size:\n", - " # Divide logits of repeated tokens by penalty\n", - " logits = tf.tensor_scatter_nd_update(\n", - " logits,\n", - " [[token_id]],\n", - " [logits[token_id] / repetition_penalty]\n", - " )\n", - "\n", - " # Apply temperature\n", - " if temperature != 1.0:\n", - " logits = logits / temperature\n", - "\n", - " # Convert to probabilities\n", - " probs = tf.nn.softmax(logits)\n", - "\n", - " # Apply top-k filtering (if specified)\n", - " if top_k is not None and top_k > 0:\n", - " k = min(top_k, tf.shape(probs)[0])\n", - " # Get top-k values and indices\n", - " top_k_values, top_k_indices = tf.nn.top_k(probs, k=k, sorted=False)\n", - " # Create mask for top-k positions\n", - " top_k_mask = tf.scatter_nd(\n", - " tf.expand_dims(top_k_indices, 1),\n", - " tf.ones_like(top_k_values, dtype=tf.bool),\n", - " tf.shape(probs)\n", - " )\n", - " # Zero out non-top-k probabilities\n", - " probs = tf.where(top_k_mask, probs, tf.zeros_like(probs))\n", - " # Renormalize\n", - " probs = probs / tf.reduce_sum(probs)\n", - " print(\n", - " f\">>> After top_k: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs\")\n", - "\n", - " # Apply top-p filtering (if specified)\n", - " if top_p is not None and top_p < 1.0:\n", - " # Sort probabilities in descending order\n", - " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", - " sorted_probs = tf.gather(probs, sorted_indices)\n", - " cumulative_probs = tf.cumsum(sorted_probs)\n", - " # Create mask for top-p\n", - " mask = cumulative_probs <= top_p\n", - " # Always keep at least one token\n", - " mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)\n", - " # Get indices to keep\n", - " keep_indices = tf.boolean_mask(sorted_indices, mask)\n", - " # Create mask for original indices\n", - " filter_mask = tf.scatter_nd(\n", - " tf.expand_dims(keep_indices, 1),\n", - " tf.ones_like(keep_indices, dtype=tf.bool),\n", - " tf.shape(probs)\n", - " )\n", - " # Apply mask and renormalize\n", - " probs = tf.where(filter_mask, probs, tf.zeros_like(probs))\n", - " probs = probs / tf.reduce_sum(probs)\n", - " print(\n", - " f\">>> After top_p: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs\")\n", - "\n", - " # Sample from the final filtered distribution\n", - " # Get non-zero indices and their probabilities\n", - " non_zero_mask = probs > 1e-8\n", - " if tf.reduce_any(non_zero_mask):\n", - " filtered_indices = tf.where(non_zero_mask)[:, 0] # Get indices\n", - " filtered_probs = tf.boolean_mask(probs, non_zero_mask) # Get probabilities\n", - " # Sample\n", - " sampled_local_index = tf.random.categorical(tf.math.log(filtered_probs)[None, :], 1)[0, 0]\n", - " # Map back to vocabulary index\n", - " next_token_id = int(filtered_indices[sampled_local_index].numpy())\n", - " else:\n", - " # Fallback if all probabilities are zero\n", - " warn(\n", - " \"Token sampling had to revert to greedy sampling, because no probs had a value > 0, unexpected\")\n", - " next_token_id = int(tf.argmax(probs, axis=-1).numpy())\n", - "\n", - " else:\n", - " # Greedy sampling (argmax) - apply repetition penalty if needed\n", - " if repetition_penalty is not None and repetition_penalty != 1.0:\n", - " unique_tokens = list(set(current_tokens))\n", - " vocab_size = tf.shape(logits)[0]\n", - " for token_id in unique_tokens:\n", - " if token_id < vocab_size:\n", - " logits = tf.tensor_scatter_nd_update(\n", - " logits,\n", - " [[token_id]],\n", - " [logits[token_id] / repetition_penalty]\n", - " )\n", - "\n", - " next_token_id = int(tf.argmax(logits, axis=-1).numpy())\n", - "\n", - " # Check for termination condition\n", - " if next_token_id == self.padding_token:\n", - " break\n", - "\n", - " # Add to generated tokens and update current tokens\n", - " generated_tokens.append(int(next_token_id))\n", - " current_tokens.append(int(next_token_id))\n", - "\n", - " # Check if we've reached max sequence length\n", - " if len(current_tokens) >= self.max_sequence_length:\n", - " break\n", - "\n", - " return token_ids + generated_tokens\n", - "\n", - "```" - ], - "metadata": { - "id": "96KSf1hKoe0H" - } - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "## How this LLM wrapper works under the hood: A Simple Overview\n", - "\n", - "- Think of a Large Language Model like the \"autocomplete\" on your cell phone's keyboard that suggests the next word.\n", - "- Now, imagine you continuously click the suggested next word.\n", - "- The model picks the mathematically most likely next word, and you just go with it, and pick the next, then the next ...\n", - "\n", - "### Here is the step-by-step flow of how it generates text.\n", - "\n", - "1. INPUT: The Prompt\n", - "\n", - "The process always starts with a piece of text from you, the user.\n", - "\n", - "\"Write a story\"\n", - "\n", - "2. STEP 1: Tokenization โ€” From Words to Numbers\n", - "\n", - "A computer doesn't understand letters or words; it understands numbers. The first step is to convert the prompt into a sequence of numbers the model can process. The tokenizer is a specialized dictionary for this job.\n", - "\n", - " What comes in: A string of text (\"Write a story\").\n", - " What goes out: A list of numerical IDs ([92, 21, 54, 21, 63, ...]).\n", - "\n", - "To make processing consistent, the input is always padded to a fixed length (e.g., 40 tokens). Any empty slots are filled with a special ID that is assigned by the tokenizer.\n", - "\n", - "\"Write a story\" -> tokenizer -> [92, 21, 54, 21, 63, 1234, 1234, ... (length 40)]\n", - "\n", - "For example it may look like:\n", - "```\n", - "92 = \"Write\"\n", - "21 = \" \"\n", - "54 = \"a\"\n", - "63 = \"story\"\n", - "1234 = \"\" (Repeated until there are 40 numbers)\n", - "```\n", - "\n", - "3. The Model's Core: Going From Token IDs to the Predicted Next Token:\n", - "\n", - "This is the \"black box\" part. Inside the model, 4 basic things happen:\n", - "\n", - " 1. Embedding (Converts the discrete, high-dimensional sequence of tokens into a continuous distribution of a smaller dimensionality).\n", - " 2. Positional embedding: Positional embedding: Takes the output of the embedding layer and represents their relative sequential order as a continuous distribution with a clear mathematical relationship.\n", - " 3. Prediction: Prediction: A lattice of Dense layers, arranged as columns and rows, each having randomized lateral connectivity with other Dense layers on the same row, and randomized vertical connectivity with Dense layers on other rows. This takes the positional embedding's output and returns a numerical answer from its head layer. This element, produced by the Cerebros NAS, serves as a more computationally efficient alternative to the attention block used in other LLMs. The output is of shape (BATCH_SIZE, VOCABULARY_SIZE) as logits.\n", - " 4. Output activation (Scales the output to a valid range). In this case, the raw output is a tensor of shape (BATCH_SIZE, VOCABULARY_SIZE). The numbers need to be cast as probabilities, so the valid range is:\n", - " - Each element in the list must be in the range between 0 and 1 (inclusive).\n", - " - The entire list of numbers must add up to 1.\n", - " - Softmax is used to accomplish this.\n", - "\n", - "As mentioned before, this is a \"Single Head\" model, unlike most LLMs (like GPT-3/4). Each call returns **only** the next token expected in the sequence, expressed as a list of probabilities (probs) of shape (BATCH_SIZE, VOCABULARY_SIZE).\n", - "\n", - "\n", - "4. Predicting the Next Word From the Output of The Final Layer:\n", - "\n", - "After the model returns a list of probabilities, we must **pick the next word** from this. There are VOCABULARY_SIZE words in the vocabulary, each assigned an index position on this list.\n", - "\n", - "5. Sampling\n", - "\n", - "- **Greedy Sampling** The naive strategy is to just pick the highest probability in this distribution (we call this greedy sampling) and assume it is the correct next token. You then decode that token ID and use it as the next word. Then de-code that toekn id and use that as the next word. Naively assuming the highest probability is correct makes for a few problems, including:\n", - " - The output will be identical every time you write the same prompt.\n", - " - Common words like \"the\", \"and\", ... will be used too often and used out of place.\n", - " - The text will seem \"dry\" and lack creative appeal.\n", - "- **Beam Sampling**: The better approach, is scaling then sampling from a few of the top choices. We apply scaling to the logits and recalculate the probabilities. Then, we eliminate unlikely possibilities. This leaves a smaller set of plausible tokens, from which we randomly select the next word. The methods we use are:\n", - " - **Presence penalty:** Steeply penalizes the logit for a token that has already been used recently or as the last word in the sequence, making it very unlikely to survive sampling and be selected. **Its purpose:** Mainly prevents the same word from being used twice **in immediate succession** \"This is **the the the** problem which **this this** scaling technique should fix.\"\n", - " - **Frequency penalty:** Mildly penalizes the logit for a token that has been **overused** in the text, but **not necessarily** the last or recent word, making it less likely to be chosen repeatedly but still possible. **For an example:** \"This technique **like** fixes **like** this from **like** happening. It's **like** really really annoying.\"\n", - " - **Repetition penalty scaling**: A penalty that balances the effects of both presence and frequency penalties, attempting to fix both problems at the same time.\n", - " - **Temperature scaling:** Temperature scaling divides logits by a number you set for 'tempterature' to control output \"creativity\" vs \"precision\". Low temperatures less than 1 make the model's top choices more likely, creating predictable text. High temperatures greater than 1, give less likely words a better chance, leading to more diverse and random text. Basically the higher you set it, the more creative and less factual the LLM's writing will be, the lower, the more precice and factual.\n", - " - After applying all scaling, we convert the logits back to probabilities using softmax. We then proceed to sampling:\n", - " - **Top k sampling**: Set a number 'k'. Eliminate all but the highest k numbers on this list of scaled probabilities.\n", - " - **Top p sampling:** Set a number 'p'. Starting from the most likely token, add up the probabilities until the sum reaches or exceeds 'p'. Keep only this cumulative set of tokens.\n", - " \n", - "Now that we have scaled and filtered the list of tokens, we randomly pick one from the remaining options.\n", - "\n", - "\n", - "6. ## The Generation Loop: We just do this on repeat.\n", - "\n", - "The model only predicts one word at a time. To complete text, we repeat this with the result of the original prompt + the result of predicting the next. We call this an **autoregressive** loop.\n", - "\n", - "\n", - " Start with a prompt \"\"Write, a, story\"\n", - " \n", - " Input: [Write, a, story]\n", - " Model predicts the token that decodes to: \"about\"\n", - "\n", - " Repeat 1: New Input: The appended sequence is fed back into the model.\n", - " New Input: [Write, a, story, about]\n", - " Model predicts: \"a\"\n", - "\n", - " Repeat 2:New Input:\n", - " New Input: [Write, a, story, about, a]\n", - " Model predicts: \"fox\"\n", - "\n", - "This loop continues until the model generates a special \"end-of-sequence\" token / pad token or it reaches its maximum length limit (40 tokens in our example).\n", - "\n", - "\n", - "\n", - "## Revisiting the analogy of the auto complete on repeat, this is what this looks like:\n" - ], - "metadata": { - "id": "kMW_6Vrq_Yi9" - } - }, - { - "cell_type": "markdown", - "source": [ - "![image.png]()" - ], - "metadata": { - "id": "NaMi9QniKqdO" - } - }, - { - "cell_type": "code", - "source": [ - "# Get the best model from the search\n", - "best_model_found = cerebros_automl.get_best_model(purge_model_storage_files='slate')\n", - "\n", - "# Create config and generative model wrapper\n", - "config = CerebrosNotGPTConfig(\n", - " max_sequence_length=MAX_SEQ_LENGTH,\n", - " padding_token=tokenizer.pad_token_id\n", - ")\n", - "generator = CerebrosNotGPT(config, model=best_model_found)\n", - "\n", - "# Test if the model can be built successfully\n", - "text = \"This is a test ...\"\n", - "input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", - "current_tokens = input_ids.copy()\n", - "PADDING_TOKEN = tokenizer.pad_token_id\n", - "\n", - "if len(current_tokens) > MAX_SEQ_LENGTH:\n", - " input_tokens = current_tokens[-MAX_SEQ_LENGTH:]\n", - "else:\n", - " padding_needed = MAX_SEQ_LENGTH - len(current_tokens)\n", - " input_tokens = current_tokens + [PADDING_TOKEN] * padding_needed\n", - "\n", - "# A dummy pass to force the model to build\n", - "\n", - "input_tensor = tf.constant([input_tokens], dtype=tf.int32)\n", - "\n", - "try:\n", - " _ = generator(input_tensor)\n", - " print(\"โœ… Building LLM Model Successful!\")\n", - "except Exception as exc:\n", - " error_message = f\"โŒ Building model returned the error: {exc}\"\n", - " print(error_message)\n" - ], - "metadata": { - "id": "AEk-TtPCxleV", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "d253eeeb-831e-48ce-f256-c8f10540064a" - }, - "execution_count": 19, - "outputs": [ - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/keras/src/layers/layer.py:421: UserWarning: `build()` was called on layer 'interleaved_ro_pe', however the layer does not have a `build()` method implemented and it looks like it has unbuilt state. This will cause the layer to be marked as built, despite not being actually built, which may cause failures down the line. Make sure to implement a proper `build()` method.\n", - " warnings.warn(\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "โœ… Building LLM Model Successful!\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Text Generation Utilities\n", - "\n", - "We define two helper functions for text generation:\n", - "\n", - "- One for greedy sampling\n", - "- One for beam sampling with various parameters." - ], - "metadata": { - "id": "u6-wAM0XyUZC" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "# Required parameter\n", - "\n", - "trial_number =1\n", - "\n", - "\n", - "# Utility function for greedy sampling\n", - "def complete_text_greedy(text: str, max_new_tokens: int = 10) -> str:\n", - " input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", - " generated_tokens = generator.generate(\n", - " token_ids=input_ids,\n", - " do_sample=False,\n", - " max_new_tokens=max_new_tokens\n", - " )\n", - " generated_text = tokenizer.decode(generated_tokens).replace(text, \"\")\n", - " return generated_text\n", - "\n", - "# Utility function for beam sampling\n", - "def complete_text_beam(text: str,\n", - " max_new_tokens: int = 10,\n", - " temperature: float = 0.75,\n", - " top_k: int = 75,\n", - " top_p: float = 0.98,\n", - " repetition_penalty: float = None,\n", - " presence_penalty: float = 1.3,\n", - " frequency_penalty: float = 1.4) -> str:\n", - " input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", - " generated_tokens = generator.generate(\n", - " token_ids=input_ids,\n", - " do_sample=True,\n", - " max_new_tokens=max_new_tokens,\n", - " temperature=temperature,\n", - " top_k=top_k,\n", - " top_p=top_p,\n", - " presence_penalty=presence_penalty,\n", - " frequency_penalty=frequency_penalty\n", - " )\n", - " generated_text = tokenizer.decode(generated_tokens).replace(text, \"\")\n", - " return generated_text\n" - ], - "metadata": { - "id": "f8XigcJcykLn" - }, - "execution_count": 20, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Running Generation Tests\n", - "\n", - "We run a series of tests with different prompts and sampling parameters to evaluate the quality of the model from Stage I-a." - ], - "metadata": { - "id": "HG0IjcWEyrXn" - } - }, - { - "cell_type": "code", - "source": [ - "def test_text(test_prompt: str, max_new_tokens: int, result_cutoff: float, trial_id: int,\n", - " test_sample_number: int, result_0: float) -> None:\n", - " \"\"\"\n", - " If the result_0 < result_cutoff, this will run a matrix of different sampling values and print out the resulting text for human subjective evaluation.\n", - "\n", - " Parameters:\n", - " - test_prompt: a string to prompt generation\n", - " - max_new_tokens: int, number of tokens to generate unless we generate a stop token.\n", - " - sample_number: Metadata for sample...\n", - " - result_0: Perplexity score from this run\n", - " - result_cutoff: Perplexity score that would be expected to indicate a trial worth running this pn\n", - "\n", - " \"\"\"\n", - " if result_0 < result_cutoff:\n", - " generation_param_permutations = [\n", - " # #3\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.6,\n", - " 'top_k': 75,\n", - " 'top_p': 0.98,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.3,\n", - " 'frequency_penalty': 1.4\n", - " },\n", - " # #4\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.7,\n", - " 'top_k': 75,\n", - " 'top_p': 0.98,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.3,\n", - " 'frequency_penalty': 1.4\n", - " },\n", - " # #5\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.7,\n", - " 'top_k': 75,\n", - " 'top_p': 0.97,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.3,\n", - " 'frequency_penalty': 1.4},\n", - " # #6\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.75,\n", - " 'top_k': 75,\n", - " 'top_p': 0.98,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.4,\n", - " 'frequency_penalty': 1.4},\n", - " # #7\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.7,\n", - " 'top_k': 75,\n", - " 'top_p': 0.98,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.4,\n", - " 'frequency_penalty': 1.4},\n", - " # #8\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.6,\n", - " 'top_k': 75,\n", - " 'top_p': 0.98,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.4,\n", - " 'frequency_penalty': 1.4\n", - " },\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.6,\n", - " 'top_k': 40,\n", - " 'top_p': 0.96,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.4,\n", - " 'frequency_penalty': 1.4\n", - " },\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.7,\n", - " 'top_k': 45,\n", - " 'top_p': 0.97,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.4,\n", - " 'frequency_penalty': 1.3\n", - " }, #\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.6,\n", - " 'top_k': 75,\n", - " 'top_p': 0.99,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.4,\n", - " 'frequency_penalty': 1.4\n", - " },\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.65,\n", - " 'top_k': 75,\n", - " 'top_p': 0.985,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 1.4,\n", - " 'frequency_penalty': 1.4\n", - " },\n", - " {\n", - " 'max_new_tokens': max_new_tokens,\n", - " 'temperature': 0.8,\n", - " 'top_k': 75,\n", - " 'top_p': 0.99,\n", - " 'repetition_penalty': None,\n", - " 'presence_penalty': 0.7,\n", - " 'frequency_penalty': 0.7\n", - " }\n", - " ]\n", - " # Default cases, no params\n", - " response_1 = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens)\n", - " print(\n", - " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: '{test_prompt}' RESPONSE: '{response_1}'\")\n", - " # print(f\"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response_1}'.\")\n", - " response_2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens)\n", - " print(\n", - " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: '{test_prompt}' RESPONSE: '{response_2}'.\")\n", - " # print(f\"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response_2}'.\")\n", - "\n", - " for perm_0 in generation_param_permutations:\n", - " response_0 = complete_text_beam(text=test_prompt,\n", - " max_new_tokens=max_new_tokens,\n", - " temperature=perm_0['temperature'],\n", - " top_k=perm_0['top_k'],\n", - " top_p=perm_0['top_p'],\n", - " repetition_penalty=perm_0['repetition_penalty'],\n", - " presence_penalty=perm_0['presence_penalty'],\n", - " frequency_penalty=perm_0['frequency_penalty'])\n", - " print(\n", - " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE PARAMS: max_new_tokens={perm_0['max_new_tokens']} temperature={perm_0['temperature']}, top_k={perm_0['top_k']}, top_p={perm_0['top_p']}, repetition_penalty={perm_0['repetition_penalty']} presence_penalty={perm_0['presence_penalty']} frequency_penalty{perm_0['frequency_penalty']} PROMPT: '{test_prompt}' RESPONSE: '{response_0}'\")\n", - "\n", - "\n", - "prompt_samples = [\n", - " \"I saw the sun and it was as shining on the\",\n", - " \"And God said, Let there be light: and there \",\n", - " \"In the beginning God created the heavens\"\n", - "]\n", - "\n", - "\n", - "counter = 0\n", - "for sample in prompt_samples:\n", - " test_text(\n", - " test_prompt=sample,\n", - " max_new_tokens=MAX_NEW_TOKENS,\n", - " result_cutoff=15,\n", - " trial_id=trial_number,\n", - " test_sample_number=counter,\n", - " result_0=phase_i_a_result)\n", - " counter += 1\n", - "\n", - "\n", - "collect()\n" - ], - "metadata": { - "id": "hut-HAJjyvn-", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "e05a9fb1-706e-4f26-e668-825f7df940c2" - }, - "execution_count": 21, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth the the the the the the the the the the the the the the'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth God beginning'.\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth. beginning created God'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 13 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 31 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 9 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 14 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created. beginning God earthless earth beginning'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth God.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 10 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 16 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' God earth beginning created heavens. earth created'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' beginning created earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 9 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created beginning earth heavens. God earth'\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created earth beginning God heavens. earth'\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 10 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 9 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' God earth beginning created beginning God. earth heavens created beginning heavens earth. earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 9 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 10 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 18 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created earth beginning God heavens. created earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 7 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 9 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 12 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' beginning earth God created earth heavens'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 26 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 16 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 26 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth created heavens earth\\Order.cpt. the beginning'\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the the.. the the the the the the the the the the.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' earth. the'.\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' earth. the.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. earth the'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. the earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. the earth'\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. earth'\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. the earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the earth.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ''\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'In the beginning God created the heavens' RESPONSE: ' heavens heavens heavens heavens and heavens heavens heavens heavens heavens heavens heavens heavens and and'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth'.\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' was earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 6 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 3 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 2 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and created earth.'\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "5885" - ] - }, - "metadata": {}, - "execution_count": 21 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Syage I-b: Extended Training\n", - "\n", - "- Now, we take the best model from Stage I-a and continue training it on a larger dataset.\n", - "- This uses a streaming `tf.data.Dataset` generator to allow handling of much larger data sets without using more RAM.\n", - "- This would allow us to select far more samples, but for now, we select a small subset for this small scale environment.\n", - "\n", - "## Streaming Data Generator for Large Datasets\n", - "\n", - "\n", - "The **SampleExpansionGenerator** class, which we create below:\n", - "\n", - " - Applies and streams the same preprocessing logic to the raw text samples as we did in Stage I-a.\n", - " - However, it preprocesses one **sample expansion batch** at a time and stores the resulting expanded samples in memory.\n", - " - It then feeds the resulting expanded samples to the model in batches matching the **model's BATCH_SIZE** as requested by the training loop.\n", - " - **sample expansion batch** is not the same as **the model's BATCH_SIZE**.\n", - "\n", - "For example, we could train on a dataset of 10 \\** 6 samples, while setting the **sample expansion batch size** to 100 while the **model's batch size** is 10.\n", - " - 100 raw text samples will be expoanded at a time.\n", - " - This results in thousands of expanded sub-samples being queued and ready for the model to take.\n", - " - The model will take 10 of these at a time until it does not have 10 left to provide.\n", - " - Then, the generator will then preprocess another 100 text samples and garbage collect.\n", - "\n", - "This allows training on datasets that would be much larger than available memory after expansion, making the training scalable.\n", - "\n", - "\n", - "### The sample expansion batch size should be optimized to balance two opposing forces:\n", - "\n", - " - Memory pressure increases with the number of expanded samples held in memory.\n", - " - Delays are caused by switching back and forth between tensor operations and preprocessing when batches are too small.\n", - "\n" - ], - "metadata": { - "id": "tuhQx2kjy4nn" - } - }, - { - "cell_type": "code", - "source": [ - "# Replace your existing class and function with these:\n", - "class SampleExpansionGenerator:\n", - " def __init__(self,\n", - " raw_text_samples,\n", - " tokenizer,\n", - " sample_expansion_batch_size=50,\n", - " model_batch_size=10,\n", - " prompt_length_0=PROMPT_LENGTH,\n", - " max_seq_length=MAX_SEQ_LENGTH,\n", - " vocabulary_size=VOCABULARY_SIZE):\n", - "\n", - " self.raw_text_samples = raw_text_samples\n", - " self.tokenizer = tokenizer\n", - " self.sample_expansion_batch_size = sample_expansion_batch_size\n", - " self.model_batch_size = model_batch_size\n", - " self.prompt_length_0 = prompt_length_0\n", - " self.max_seq_length = max_seq_length\n", - " self.vocabulary_size = vocabulary_size\n", - " self.data = []\n", - " self.labels = []\n", - " self.current_index = 0\n", - "\n", - " def _expand_next_batch(self):\n", - " # If we've already processed all raw samples for this epoch, do nothing.\n", - " if self.current_index >= len(self.raw_text_samples):\n", - " return\n", - "\n", - " # Determine the next meta-batch\n", - " start_idx = self.current_index\n", - " end_idx = min(start_idx + self.sample_expansion_batch_size, len(self.raw_text_samples))\n", - "\n", - " batch_samples = self.raw_text_samples[start_idx:end_idx]\n", - " self.current_index = end_idx\n", - "\n", - " # Run prepare_data on this batch\n", - " input_ids_list, labels_list, _ = prepare_data(\n", - " data_0=batch_samples,\n", - " tokenizer_0=self.tokenizer,\n", - " max_seq_length=self.max_seq_length,\n", - " prompt_length=self.prompt_length_0)\n", - "\n", - " # Add the new data to our internal queues\n", - " self.data.extend(input_ids_list)\n", - " self.labels.extend(labels_list)\n", - "\n", - " def __iter__(self):\n", - " # Reset to initial state for new epoch\n", - " self.current_index = 0\n", - " self.data = []\n", - " self.labels = []\n", - " return self\n", - "\n", - " def __next__(self):\n", - " # If queues are empty, try to expand them from raw samples\n", - " if not self.data:\n", - " self._expand_next_batch()\n", - "\n", - " # If they are STILL empty after trying to expand, the epoch is over.\n", - " if not self.data:\n", - " raise StopIteration\n", - "\n", - " # Pop and return one sample\n", - " input_sample = self.data.pop(0)\n", - " label_sample = self.labels.pop(0)\n", - "\n", - " return ((input_sample,), label_sample)\n", - "\n", - "\n", - "# Create the tf.data.Dataset\n", - "def create_dataset(raw_text_samples, tokenizer, sample_expansion_batch_size=50, model_batch_size=10) -> tf.data.Dataset:\n", - " generator_0 = SampleExpansionGenerator(\n", - " raw_text_samples=raw_text_samples,\n", - " tokenizer=tokenizer,\n", - " sample_expansion_batch_size=sample_expansion_batch_size,\n", - " model_batch_size=model_batch_size # Pass this parameter\n", - " )\n", - "\n", - " dataset = tf.data.Dataset.from_generator(\n", - " lambda: generator_0,\n", - " # output_signature=(\n", - " # (tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32),),\n", - " # # tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32), # Use generator's parameter\n", - " # tf.TensorSpec(shape=(generator_0.vocabulary_size,), dtype=tf.float32) # Use generator's parameter\n", - " # )\n", - " output_signature=(\n", - " (tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32),), # A tuple containing ONE TensorSpec\n", - " tf.TensorSpec(shape=(generator_0.vocabulary_size,), dtype=tf.float32) # A single TensorSpec\n", - " )\n", - " )\n", - "\n", - " # Batch it\n", - " dataset = dataset.batch(model_batch_size)\n", - " dataset = dataset.prefetch(tf.data.AUTOTUNE) # Prefetch for performance\n", - " return dataset\n", - "\n", - "# Create training and validation datasets\n", - "phase_i_b_train_dataset = create_dataset(\n", - " raw_text_samples=phase_i_b_train_samples,\n", - " tokenizer=tokenizer,\n", - " sample_expansion_batch_size=PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE,\n", - " model_batch_size=batch_size\n", - ")\n", - "\n", - "phase_i_b_val_dataset = create_dataset(\n", - " raw_text_samples=phase_i_b_val_samples,\n", - " tokenizer=tokenizer,\n", - " sample_expansion_batch_size=PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE,\n", - " model_batch_size=batch_size\n", - ")\n" - ], - "metadata": { - "id": "MHWWE0xIzLRD" - }, - "execution_count": 22, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "type(phase_i_b_train_dataset)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 121 - }, - "id": "HxwyQzSppQwp", - "outputId": "89a48aa5-c364-4057-98c4-fc4a291f448e" - }, - "execution_count": 23, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "tensorflow.python.data.ops.prefetch_op._PrefetchDataset" - ], - "text/html": [ - "
\n", - "
tensorflow.python.data.ops.prefetch_op._PrefetchDataset
def __init__(input_dataset, buffer_size, slack_period=None, name=None)
/usr/local/lib/python3.12/dist-packages/tensorflow/python/data/ops/prefetch_op.pyA `Dataset` that asynchronously prefetches its input.
\n", - " \n", - "
" - ] - }, - "metadata": {}, - "execution_count": 23 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "\n", - "## Model Compilation for Phase I-b\n", - "\n", - "- We recompile the model with the same base optimizer (AdamW), however this time with a custom learning rate scheduler (WarmupCosineDecayRestarts), and for disambiguation, relevant metrics for this training phase. We also add an EarlyStopping callback which is mainly being used to restore the weights from the best epoch, if that turns out to not be the last epoch.\n", - "\n", - "\n", - "## For those wanting to scale this up, a word to point out:\n", - "\n", - "The parameters for the learning rate scheduler may need to be optimized. They will be different for your data. Alternatively, you can remove the learning rate scheduler if this is too much trail and error.\n", - "\n", - "- We set the starting learning rate at: 0.0039295722955565125\n", - "- We set warmup steps to 1140, which for the data selected is 15 epochs.\n", - "- We set first decay steps to 1900, which for this data set is about 25 epochs.\n", - "\n", - "Also:\n", - "\n", - "Additionally, the early stopping callback will likely need to be adjusted. When training at scale, you may use a lower learning rate and a larger number of epochs, as well as a larger value for the start_from_epoch parameter (which specifies when to begin tracking the metric for early stopping).\n", - "\n", - "FYI, this is the custom scheduler we imported from cerebrosllmutils (CosineDecayRestarts augmented with warmup steps):\n", - "\n", - "\n", - "```python\n", - "# A custom schedule: Cosine decay with some warm - up steps\n", - "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='WarmupCosineDecayRestarts')\n", - "class WarmupCosineDecayRestarts(tf.keras.optimizers.schedules.LearningRateSchedule):\n", - " \"\"\"\n", - " A learning rate schedule that combines a linear warmup with cosine decay restarts.\n", - " \"\"\"\n", - "\n", - " def __init__(self, initial_learning_rate, warmup_steps, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0):\n", - " super().__init__()\n", - "\n", - " # Store all parameters as public attributes for get_config serialization\n", - " self.initial_learning_rate = initial_learning_rate\n", - " self.warmup_steps = warmup_steps\n", - " self.first_decay_steps = first_decay_steps\n", - " self.t_mul = t_mul\n", - " self.m_mul = m_mul\n", - " self.alpha = alpha\n", - "\n", - " # Create the CosineDecayRestarts schedule for internal logic.\n", - " # The parameters passed here are the same ones we just stored.\n", - " self.cosine_restarts_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(\n", - " initial_learning_rate=initial_learning_rate,\n", - " first_decay_steps=first_decay_steps,\n", - " t_mul=t_mul,\n", - " m_mul=m_mul,\n", - " alpha=alpha\n", - " )\n", - "\n", - "\n", - " def __call__(self, step):\n", - " step = tf.cast(step, dtype=tf.float32)\n", - "\n", - " # Calculate the learning rate for both phases unconditionally\n", - " warmup_lr = self.initial_learning_rate * step / self.warmup_steps\n", - "\n", - " # The cosine schedule is designed to start from step 0, so we give it\n", - " # the \"post-warmup\" step count.\n", - " decay_lr = self.cosine_restarts_schedule(step - self.warmup_steps)\n", - "\n", - " # Create a multiplier that is 1.0 during warmup and 0.0 after.\n", - " # tf.cast(condition, tf.float32) converts a boolean tensor to 1.0 or 0.0.\n", - " warmup_multiplier = tf.cast(step < self.warmup_steps, tf.float32)\n", - "\n", - " # The decay multiplier is the opposite.\n", - " decay_multiplier = 1.0 - warmup_multiplier\n", - "\n", - " # Combine the two learning rates. Only one will be active at a time.\n", - " return (warmup_multiplier * warmup_lr) + (decay_multiplier * decay_lr)\n", - "\n", - " def get_config(self):\n", - " # Use the stored public attributes for the config.\n", - " # This bypasses the issue of accessing private attributes (_t_mul) from\n", - " # the nested Keras object, which can be brittle.\n", - " config = {\n", - " \"initial_learning_rate\": self.initial_learning_rate,\n", - " \"warmup_steps\": self.warmup_steps,\n", - " \"first_decay_steps\": self.first_decay_steps,\n", - " \"t_mul\": self.t_mul,\n", - " \"m_mul\": self.m_mul,\n", - " \"alpha\": self.alpha,\n", - " }\n", - "\n", - " # Use from_config to properly allow deserialization\n", - " return config\n", - "```\n", - "\n" - ], - "metadata": { - "id": "DPaeJKEzzlPw" - } - }, - { - "cell_type": "code", - "source": [ - "# Define loss and metrics for Phase I-b\n", - "phase_i_b_loss = tf.keras.losses.CategoricalCrossentropy()\n", - "phase_i_b_categorical_accuracy = tf.keras.metrics.CategoricalAccuracy()\n", - "phase_i_b_perplexity = Perplexity(name=\"perplexity_phase_i_b\")\n", - "\n", - "# Create the learning rate schedule instance\n", - "lr_scheduler = WarmupCosineDecayRestarts(\n", - " initial_learning_rate=INITIAL_LR_STAGE_I_B,\n", - " warmup_steps=WARMUP_STEPS,\n", - " first_decay_steps=FIRST_DECAY_STEPS_STAGE_I_B,\n", - " t_mul=1.0,\n", - " m_mul=0.9,\n", - " alpha=0.01\n", - ")\n", - "\n", - "# Recompile the existing model\n", - "generator.model.compile(\n", - " loss=phase_i_b_loss,\n", - " metrics=[phase_i_b_categorical_accuracy, phase_i_b_perplexity],\n", - " optimizer=tf.keras.optimizers.AdamW(\n", - " learning_rate=lr_scheduler,\n", - " weight_decay=phase_i_b_weight_decay,\n", - " gradient_accumulation_steps=phase_i_b_gradient_accumulation_steps\n", - " ),\n", - " jit_compile=True\n", - ")\n", - "\n", - "# Define the Early Stopping callback\n", - "early_stopping = tf.keras.callbacks.EarlyStopping(\n", - " monitor='perplexity_phase_i_b', # Monitor validation perplexity\n", - " patience=10, # Number of epochs with no improvement after which training will be stopped.\n", - " verbose=1,\n", - " restore_best_weights=True, # Restores model weights from the epoch with the best value of the monitored metric.\n", - " mode='min',\n", - " start_from_epoch=40\n", - ")\n", - "\n", - "\n", - "callbacks_list = [early_stopping]\n" - ], - "metadata": { - "id": "GGkEVa2dzOtf" - }, - "execution_count": 24, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "# Run Stage I-b Training\n", - "\n", - "- We start the training process using the model.fit method with the new datasets and callbacks to continue training the same model on another dataset. In our at scale runs, both the previous stage and this stage are dene on far more data." - ], - "metadata": { - "id": "y_K5nLzVz_-b" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "\n", - "\n", - "\n", - "# print(\"Calculating steps per epoch...\")\n", - "# train_steps = sum(1 for _ in phase_i_b_train_dataset)\n", - "# val_steps = sum(1 for _ in phase_i_b_val_dataset)\n", - "# print(f\"Calculated training steps per epoch: {train_steps}\")\n", - "# print(f\"Calculated validation steps: {val_steps}\")\n", - "\n", - "# Train the model\n", - "phase_i_b_history = generator.model.fit(\n", - " x=phase_i_b_train_dataset,\n", - " validation_data=phase_i_b_val_dataset,\n", - " epochs=phase_i_b_epochs,\n", - " callbacks=callbacks_list\n", - ")\n", - "\n", - "# Store history and get the best validation perplexity\n", - "phase_i_b_history = pd.DataFrame(phase_i_b_history.history)\n", - "result_phase_i_b = float(phase_i_b_history['perplexity_phase_i_b'].min())\n", - "f\"Result of Stage 1-b training {result_phase_i_b}\"\n" - ], - "metadata": { - "id": "3GGqvlIl0FvV", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "outputId": "0daf05b2-7072-4818-8b47-a05558b33470" - }, - "execution_count": 25, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Epoch 1/53\n", - " 76/Unknown \u001b[1m69s\u001b[0m 636ms/step - categorical_accuracy: 0.0389 - loss: 13.6508 - perplexity_phase_i_b: 966782.2500" - ] - }, - { - "output_type": "stream", - "name": "stderr", - "text": [ - "/usr/local/lib/python3.12/dist-packages/keras/src/trainers/epoch_iterator.py:160: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.\n", - " self._interrupted_warning()\n" - ] - }, - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 690ms/step - categorical_accuracy: 0.0388 - loss: 13.6471 - perplexity_phase_i_b: 962529.6250 - val_categorical_accuracy: 0.0492 - val_loss: 11.5516 - val_perplexity_phase_i_b: 103939.8906\n", - "Epoch 2/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 609ms/step - categorical_accuracy: 0.0164 - loss: 13.8992 - perplexity_phase_i_b: 2969392.7500 - val_categorical_accuracy: 0.0492 - val_loss: 12.0771 - val_perplexity_phase_i_b: 175791.3594\n", - "Epoch 3/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 609ms/step - categorical_accuracy: 0.0250 - loss: 12.8039 - perplexity_phase_i_b: 402124.0625 - val_categorical_accuracy: 0.0656 - val_loss: 12.3528 - val_perplexity_phase_i_b: 231597.3438\n", - "Epoch 4/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 591ms/step - categorical_accuracy: 0.0415 - loss: 11.6595 - perplexity_phase_i_b: 140648.8125 - val_categorical_accuracy: 0.0492 - val_loss: 12.4123 - val_perplexity_phase_i_b: 245801.6250\n", - "Epoch 5/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 611ms/step - categorical_accuracy: 0.0439 - loss: 11.1954 - perplexity_phase_i_b: 73950.6797 - val_categorical_accuracy: 0.0492 - val_loss: 12.3395 - val_perplexity_phase_i_b: 228538.3750\n", - "Epoch 6/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 589ms/step - categorical_accuracy: 0.0816 - loss: 10.2579 - perplexity_phase_i_b: 29194.4102 - val_categorical_accuracy: 0.0656 - val_loss: 12.1179 - val_perplexity_phase_i_b: 183113.2031\n", - "Epoch 7/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.0590 - loss: 9.9608 - perplexity_phase_i_b: 22667.8711 - val_categorical_accuracy: 0.0492 - val_loss: 11.8740 - val_perplexity_phase_i_b: 143489.0312\n", - "Epoch 8/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m97s\u001b[0m 599ms/step - categorical_accuracy: 0.0593 - loss: 8.9806 - perplexity_phase_i_b: 8207.2861 - val_categorical_accuracy: 0.0328 - val_loss: 12.3863 - val_perplexity_phase_i_b: 239495.6562\n", - "Epoch 9/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 558ms/step - categorical_accuracy: 0.0661 - loss: 7.8740 - perplexity_phase_i_b: 2828.0859 - val_categorical_accuracy: 0.0164 - val_loss: 11.9790 - val_perplexity_phase_i_b: 159370.9219\n", - "Epoch 10/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 630ms/step - categorical_accuracy: 0.1062 - loss: 6.8127 - perplexity_phase_i_b: 987.0147 - val_categorical_accuracy: 0.0328 - val_loss: 11.2031 - val_perplexity_phase_i_b: 73360.1719\n", - "Epoch 11/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 604ms/step - categorical_accuracy: 0.0687 - loss: 5.7574 - perplexity_phase_i_b: 324.8636 - val_categorical_accuracy: 0.0164 - val_loss: 9.6458 - val_perplexity_phase_i_b: 15456.3154\n", - "Epoch 12/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m87s\u001b[0m 686ms/step - categorical_accuracy: 0.0943 - loss: 4.8160 - perplexity_phase_i_b: 124.1660 - val_categorical_accuracy: 0.0492 - val_loss: 8.6260 - val_perplexity_phase_i_b: 5574.9229\n", - "Epoch 13/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 608ms/step - categorical_accuracy: 0.1206 - loss: 4.4321 - perplexity_phase_i_b: 84.3652 - val_categorical_accuracy: 0.0328 - val_loss: 8.1588 - val_perplexity_phase_i_b: 3493.8950\n", - "Epoch 14/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m54s\u001b[0m 597ms/step - categorical_accuracy: 0.1237 - loss: 4.4953 - perplexity_phase_i_b: 91.3969 - val_categorical_accuracy: 0.0328 - val_loss: 8.3403 - val_perplexity_phase_i_b: 4189.2686\n", - "Epoch 15/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 579ms/step - categorical_accuracy: 0.0997 - loss: 4.2491 - perplexity_phase_i_b: 70.9299 - val_categorical_accuracy: 0.0656 - val_loss: 8.6163 - val_perplexity_phase_i_b: 5520.8823\n", - "Epoch 16/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m88s\u001b[0m 585ms/step - categorical_accuracy: 0.1204 - loss: 4.2542 - perplexity_phase_i_b: 70.9240 - val_categorical_accuracy: 0.0656 - val_loss: 8.7940 - val_perplexity_phase_i_b: 6594.3228\n", - "Epoch 17/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 577ms/step - categorical_accuracy: 0.1386 - loss: 4.2547 - perplexity_phase_i_b: 70.8944 - val_categorical_accuracy: 0.0984 - val_loss: 8.7318 - val_perplexity_phase_i_b: 6196.8022\n", - "Epoch 18/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 597ms/step - categorical_accuracy: 0.1209 - loss: 4.2489 - perplexity_phase_i_b: 70.4136 - val_categorical_accuracy: 0.0984 - val_loss: 8.9164 - val_perplexity_phase_i_b: 7453.2446\n", - "Epoch 19/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 567ms/step - categorical_accuracy: 0.1236 - loss: 4.2367 - perplexity_phase_i_b: 69.5275 - val_categorical_accuracy: 0.0656 - val_loss: 8.8083 - val_perplexity_phase_i_b: 6689.4990\n", - "Epoch 20/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 596ms/step - categorical_accuracy: 0.1506 - loss: 4.1450 - perplexity_phase_i_b: 63.6329 - val_categorical_accuracy: 0.0656 - val_loss: 8.6605 - val_perplexity_phase_i_b: 5770.2129\n", - "Epoch 21/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 564ms/step - categorical_accuracy: 0.1424 - loss: 4.0012 - perplexity_phase_i_b: 55.2548 - val_categorical_accuracy: 0.0820 - val_loss: 8.6945 - val_perplexity_phase_i_b: 5970.1401\n", - "Epoch 22/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 589ms/step - categorical_accuracy: 0.1520 - loss: 4.1843 - perplexity_phase_i_b: 66.0555 - val_categorical_accuracy: 0.0656 - val_loss: 8.3286 - val_perplexity_phase_i_b: 4140.4941\n", - "Epoch 23/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 557ms/step - categorical_accuracy: 0.1807 - loss: 3.8604 - perplexity_phase_i_b: 48.0663 - val_categorical_accuracy: 0.0656 - val_loss: 8.6137 - val_perplexity_phase_i_b: 5506.4224\n", - "Epoch 24/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 608ms/step - categorical_accuracy: 0.1533 - loss: 3.9858 - perplexity_phase_i_b: 54.5812 - val_categorical_accuracy: 0.1148 - val_loss: 8.5935 - val_perplexity_phase_i_b: 5396.4331\n", - "Epoch 25/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.1230 - loss: 4.0118 - perplexity_phase_i_b: 55.6288 - val_categorical_accuracy: 0.1475 - val_loss: 8.6210 - val_perplexity_phase_i_b: 5547.1172\n", - "Epoch 26/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 564ms/step - categorical_accuracy: 0.1588 - loss: 3.8591 - perplexity_phase_i_b: 47.8675 - val_categorical_accuracy: 0.1148 - val_loss: 8.4999 - val_perplexity_phase_i_b: 4914.4688\n", - "Epoch 27/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 582ms/step - categorical_accuracy: 0.1900 - loss: 3.8535 - perplexity_phase_i_b: 47.2824 - val_categorical_accuracy: 0.0820 - val_loss: 8.7680 - val_perplexity_phase_i_b: 6425.2207\n", - "Epoch 28/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 593ms/step - categorical_accuracy: 0.1927 - loss: 3.6720 - perplexity_phase_i_b: 39.7386 - val_categorical_accuracy: 0.0656 - val_loss: 8.7999 - val_perplexity_phase_i_b: 6633.3721\n", - "Epoch 29/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 594ms/step - categorical_accuracy: 0.1848 - loss: 3.8259 - perplexity_phase_i_b: 46.2804 - val_categorical_accuracy: 0.0656 - val_loss: 8.6051 - val_perplexity_phase_i_b: 5459.4458\n", - "Epoch 30/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 597ms/step - categorical_accuracy: 0.1691 - loss: 3.6890 - perplexity_phase_i_b: 40.3801 - val_categorical_accuracy: 0.0984 - val_loss: 8.5689 - val_perplexity_phase_i_b: 5265.4810\n", - "Epoch 31/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 587ms/step - categorical_accuracy: 0.1774 - loss: 3.6971 - perplexity_phase_i_b: 40.6956 - val_categorical_accuracy: 0.0984 - val_loss: 8.7037 - val_perplexity_phase_i_b: 6025.3599\n", - "Epoch 32/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.1597 - loss: 3.6218 - perplexity_phase_i_b: 37.8592 - val_categorical_accuracy: 0.0984 - val_loss: 8.7827 - val_perplexity_phase_i_b: 6520.5991\n", - "Epoch 33/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.2066 - loss: 3.6265 - perplexity_phase_i_b: 38.0441 - val_categorical_accuracy: 0.0984 - val_loss: 8.7695 - val_perplexity_phase_i_b: 6434.8853\n", - "Epoch 34/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m42s\u001b[0m 550ms/step - categorical_accuracy: 0.1622 - loss: 3.7388 - perplexity_phase_i_b: 42.4272 - val_categorical_accuracy: 0.1148 - val_loss: 8.6601 - val_perplexity_phase_i_b: 5768.0454\n", - "Epoch 35/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m41s\u001b[0m 537ms/step - categorical_accuracy: 0.1974 - loss: 3.4737 - perplexity_phase_i_b: 32.6702 - val_categorical_accuracy: 0.1148 - val_loss: 8.6486 - val_perplexity_phase_i_b: 5702.0361\n", - "Epoch 36/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 603ms/step - categorical_accuracy: 0.1640 - loss: 3.5527 - perplexity_phase_i_b: 35.4395 - val_categorical_accuracy: 0.1148 - val_loss: 8.7015 - val_perplexity_phase_i_b: 6011.7910\n", - "Epoch 37/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1779 - loss: 3.5903 - perplexity_phase_i_b: 36.4963 - val_categorical_accuracy: 0.1148 - val_loss: 8.7223 - val_perplexity_phase_i_b: 6138.1729\n", - "Epoch 38/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m96s\u001b[0m 598ms/step - categorical_accuracy: 0.1935 - loss: 3.5401 - perplexity_phase_i_b: 34.7298 - val_categorical_accuracy: 0.1148 - val_loss: 8.6995 - val_perplexity_phase_i_b: 5999.7402\n", - "Epoch 39/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m48s\u001b[0m 622ms/step - categorical_accuracy: 0.2109 - loss: 3.5383 - perplexity_phase_i_b: 34.5639 - val_categorical_accuracy: 0.1148 - val_loss: 8.6650 - val_perplexity_phase_i_b: 5796.6436\n", - "Epoch 40/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m42s\u001b[0m 555ms/step - categorical_accuracy: 0.2047 - loss: 3.5124 - perplexity_phase_i_b: 33.9720 - val_categorical_accuracy: 0.1148 - val_loss: 8.7431 - val_perplexity_phase_i_b: 6267.4624\n", - "Epoch 41/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 576ms/step - categorical_accuracy: 0.1514 - loss: 3.5711 - perplexity_phase_i_b: 35.7887 - val_categorical_accuracy: 0.0656 - val_loss: 8.9814 - val_perplexity_phase_i_b: 7953.5283\n", - "Epoch 42/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1761 - loss: 3.6074 - perplexity_phase_i_b: 37.1983 - val_categorical_accuracy: 0.0984 - val_loss: 9.0303 - val_perplexity_phase_i_b: 8352.2227\n", - "Epoch 43/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 571ms/step - categorical_accuracy: 0.1727 - loss: 3.6003 - perplexity_phase_i_b: 36.7872 - val_categorical_accuracy: 0.0328 - val_loss: 8.9927 - val_perplexity_phase_i_b: 8044.2207\n", - "Epoch 44/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m86s\u001b[0m 619ms/step - categorical_accuracy: 0.1786 - loss: 3.7416 - perplexity_phase_i_b: 42.6958 - val_categorical_accuracy: 0.1148 - val_loss: 9.1039 - val_perplexity_phase_i_b: 8990.1494\n", - "Epoch 45/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 594ms/step - categorical_accuracy: 0.2062 - loss: 3.6020 - perplexity_phase_i_b: 37.0046 - val_categorical_accuracy: 0.0984 - val_loss: 9.3867 - val_perplexity_phase_i_b: 11928.1768\n", - "Epoch 46/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 583ms/step - categorical_accuracy: 0.2035 - loss: 3.6276 - perplexity_phase_i_b: 37.9026 - val_categorical_accuracy: 0.0820 - val_loss: 9.5581 - val_perplexity_phase_i_b: 14159.1719\n", - "Epoch 47/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1784 - loss: 3.4276 - perplexity_phase_i_b: 31.0932 - val_categorical_accuracy: 0.1148 - val_loss: 9.1575 - val_perplexity_phase_i_b: 9485.0088\n", - "Epoch 48/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 583ms/step - categorical_accuracy: 0.1864 - loss: 3.4227 - perplexity_phase_i_b: 31.1301 - val_categorical_accuracy: 0.1148 - val_loss: 9.1156 - val_perplexity_phase_i_b: 9095.7666\n", - "Epoch 49/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 622ms/step - categorical_accuracy: 0.2266 - loss: 3.4226 - perplexity_phase_i_b: 30.7439 - val_categorical_accuracy: 0.0820 - val_loss: 9.4648 - val_perplexity_phase_i_b: 12897.0039\n", - "Epoch 50/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m95s\u001b[0m 589ms/step - categorical_accuracy: 0.2455 - loss: 3.4171 - perplexity_phase_i_b: 30.9408 - val_categorical_accuracy: 0.0820 - val_loss: 9.4194 - val_perplexity_phase_i_b: 12325.3525\n", - "Epoch 51/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 596ms/step - categorical_accuracy: 0.2168 - loss: 3.2941 - perplexity_phase_i_b: 27.1144 - val_categorical_accuracy: 0.0984 - val_loss: 9.3049 - val_perplexity_phase_i_b: 10991.2559\n", - "Epoch 52/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 642ms/step - categorical_accuracy: 0.1940 - loss: 3.3548 - perplexity_phase_i_b: 28.8572 - val_categorical_accuracy: 0.0984 - val_loss: 9.1126 - val_perplexity_phase_i_b: 9068.9150\n", - "Epoch 53/53\n", - "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 610ms/step - categorical_accuracy: 0.2291 - loss: 3.3674 - perplexity_phase_i_b: 29.2831 - val_categorical_accuracy: 0.1311 - val_loss: 9.1200 - val_perplexity_phase_i_b: 9136.3135\n", - "Restoring model weights from the end of the best epoch: 53.\n" - ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "'Result of Stage 1-b training 29.637819290161133'" - ], - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - } - }, - "metadata": {}, - "execution_count": 25 - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Stage I-b: Model Evaluation and Serialization\n", - "\n", - "After extended training, we evaluate the final model performance and save the model and tokenizer for future use.\n" - ], - "metadata": { - "id": "y8Ej2P7D0T8R" - } - }, - { - "cell_type": "markdown", - "source": [ - "# Final Generation Tests on the Stage I-b model checkpoint\n", - "\n", - "Confirm the model works after Stage I-b training." - ], - "metadata": { - "id": "dWlYvYBq0dio" - } - }, - { - "cell_type": "code", - "source": [ - "print(\"########### Phase I-b Model Checkpoint Generation Samples: ###########\")\n", - "\n", - "counter = 0\n", - "for sample in prompt_samples:\n", - " test_text(\n", - " test_prompt=sample,\n", - " max_new_tokens=MAX_NEW_TOKENS,\n", - " result_cutoff=60, #\n", - " trial_id=trial_number,\n", - " test_sample_number=counter,\n", - " result_0=result_phase_i_b\n", - " )\n", - " counter += 1\n" - ], - "metadata": { - "id": "YhGaTbGF0X_d", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "8071bc5a-8520-4d13-82e1-cbd941297b4b" - }, - "execution_count": 26, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "########### Phase I-b Model Checkpoint Generation Samples: ###########\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ',,, and fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 41 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 5 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for morning, over tree with, fruit lights bring fruit great livestock.''.\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 55 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', serve'to lights produce according each kind'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', greater and that creeping waters for fifth'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 55 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for, lights bird produce fourth to its with'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 64 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 36 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: 'Be and, image said birds creeping day. God'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 50 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 53 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 36 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 4 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' night, image to over fish creature earth.''\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 55 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for, to birds bird, according forth every.' man animals'\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 19 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 31 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 31 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 31 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 30 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 30 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 21 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', for plant domin fruition day with'\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 36 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 12 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 15 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 10 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for'lights, great waters eachBe.' fruit also'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 53 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 15 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', saying produce livestock for every lights-bearing day fifth give to.''\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 64 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', fifth give to livestock light fruitful its that day every so.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 65 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 68 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 68 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 67 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 67 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 66 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 67 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 64 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 59 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 31 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 14 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 17 non-zero probs\n", - "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' waters,, for to bring its, fruit.' kind.' and its wild'\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there,, and fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 50 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' to each the kind fruit that in great birds day. its'.\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 41 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 24 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man was forth fruit great with lesser thing animals'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 55 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 55 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 50 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 53 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man to each thing multiply the so fruit in that as saw'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 17 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' fly created the so.' livestock fruit according'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 58 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' each fruitful-bearing in animals as man was'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 26 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 21 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 19 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 20 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 1 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' that themBeh fruit man in great according forth signs fruit.''\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 13 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 22 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' them. fruit'\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 26 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' each created so animals'\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 30 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 15 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 14 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man-bearing lesser so animals each.' wild'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 19 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 21 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 17 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' to man each bring kind fruit forth. its animals'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 50 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 18 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' so man each. fruit in as'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 65 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 67 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 66 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 66 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 64 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man-bearing said forth so in them according signs fruit'\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'In the beginning God created the heavens' RESPONSE: ',,,,, and day day day lesser'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 41 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 53 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 41 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 36 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 15 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'In the beginning God created the heavens' RESPONSE: ',Let and was living he lesser so multiply seed fruitful livestock.'.\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' set, and said them over was man each it'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 53 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 53 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 49 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and fruitful, to was forth them'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 47 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 50 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', earth trees seed was and day good rule forth.'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 41 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 54 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 49 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 49 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 50 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 48 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 49 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 49 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 21 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 21 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees them said he day good upon,' thing. fruit'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 46 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 36 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 41 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 39 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees was day to seed lesser he living earth each'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 30 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 8 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees was he said day. each living he fruit so'\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 15 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 25 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 22 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 20 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 20 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 19 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 20 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 24 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 27 non-zero probs\n", - ">>> After top_k: [128260] shape, 40 non-zero probs\n", - ">>> After top_p: [128260] shape, 24 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', it earth creatures day living man lesser and he each'\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 34 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 30 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - ">>> After top_k: [128260] shape, 45 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and was living day he rule trees., according'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 35 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 51 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 38 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 40 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 36 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 37 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees it said upon man was forth day fruit each tree wing multiply'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 36 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 41 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 52 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 55 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 45 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 44 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 42 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 43 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' waters, and was so according each lesser multiply'\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 55 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 63 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 64 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 62 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 61 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 60 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 57 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 56 non-zero probs\n", - ">>> After top_k: [128260] shape, 75 non-zero probs\n", - ">>> After top_p: [128260] shape, 53 non-zero probs\n", - "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'In the beginning God created the heavens' RESPONSE: ',ed and to be it, multiply fruitful each'\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Save Model and Tokenizer\n", - "\n", - "- Finally, we save the tokenizer and the trained model weights to disk." - ], - "metadata": { - "id": "-oCAeR4n0mPW" - } - }, - { - "cell_type": "code", - "source": [ - "trial_number = 1 # Make sure to set this to a unique number:\n", - "# Serialize tokenizer\n", - "TOKENIZER_SAVE_PATH = f\"tokenizer-tr-{trial_number}-stage-i-b\"\n", - "tokenizer.save_pretrained(TOKENIZER_SAVE_PATH)\n", - "print(f\"Tokenizer saved to {TOKENIZER_SAVE_PATH}\")\n", - "\n", - "# Serialize model\n", - "MODEL_SAVE_PATH = f\"final_phase_ib_model_tr_{trial_number}-stage-i-b.keras\"\n", - "generator.save(MODEL_SAVE_PATH)\n", - "print(f\"Final model saved to {MODEL_SAVE_PATH}\")\n" - ], - "metadata": { - "id": "ziYdmmII0qfu", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "37a1153f-09a0-4274-9ca2-e280112e65e6" - }, - "execution_count": 27, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Tokenizer saved to tokenizer-tr-1-stage-i-b\n", - "Final model saved to final_phase_ib_model_tr_1-stage-i-b.keras\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Serialization Test\n", - "\n", - "- We run an external script (test_llm_serialization.py) to validate that the saved model and tokenizer can be loaded and used correctly." - ], - "metadata": { - "id": "y9Pvhcvl0uGt" - } - }, - { - "cell_type": "code", - "source": [ - "print(f\"๐Ÿงช Running serialization test for Stage I-b trial {trial_number}...\")\n", - "result = subprocess.run(\n", - " f\"python3 test_llm_serialization.py {TOKENIZER_SAVE_PATH} {MODEL_SAVE_PATH}\",\n", - " capture_output=True,\n", - " shell=True,\n", - " text=True # Use text=True for string output\n", - ")\n", - "\n", - "if result.returncode == 0:\n", - " print(\"โœ… Serialization test passed.\")\n", - " print(\"STDOUT:\", result.stdout)\n", - "else:\n", - " print(\"โŒ Serialization test failed.\")\n", - " print(\"STDERR:\", result.stderr)\n", - " if result.stdout:\n", - " print(\"STDOUT:\", result.stdout)\n" - ], - "metadata": { - "id": "qA5Cord40yID", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "389fe0bf-c935-4f49-dd4f-8eea8672c634" - }, - "execution_count": 28, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "๐Ÿงช Running serialization test for Stage I-b trial 1...\n", - "โœ… Serialization test passed.\n", - "STDOUT: โœ… Tokenizer loaded successfully.\n", - "โœ… CerebrosNotGPT model loaded successfully.\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 19 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 31 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 29 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 26 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 31 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 30 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 28 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 32 non-zero probs\n", - ">>> After top_k: [128260] shape, 50 non-zero probs\n", - ">>> After top_p: [128260] shape, 33 non-zero probs\n", - "๐Ÿง  (serialized) Prompt: In the beginning God created the Generated Text from Serialized Model: 'In the beginning God created the, waters each trees and to living man according them'\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# And there you have it: What it takes to build an LLM from scratch using our novel architecture.\n" - ], - "metadata": { - "id": "z1lSMQ6i03XC" - } - }, - { - "cell_type": "code", - "source": [], - "metadata": { - "id": "W6lcAxij-Z5r" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file From eaf660ef27614ca6c3a08fb286ae6e474ad12513 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Mon, 24 Nov 2025 19:14:56 -0500 Subject: [PATCH 2/4] Add files via upload --- ...1_23_demo_train_an_llm_with_cerebros.ipynb | 6561 +++++++++++++++++ 1 file changed, 6561 insertions(+) create mode 100644 2025_11_23_demo_train_an_llm_with_cerebros.ipynb diff --git a/2025_11_23_demo_train_an_llm_with_cerebros.ipynb b/2025_11_23_demo_train_an_llm_with_cerebros.ipynb new file mode 100644 index 0000000..d1f0f28 --- /dev/null +++ b/2025_11_23_demo_train_an_llm_with_cerebros.ipynb @@ -0,0 +1,6561 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Build our LLM From Scratch -\n", + "\n", + "## How Cerebros NotGPT works under the hood:\n", + "\n", + "\n", + "### This notebook demonstrates the end-to-end training pipeline that builds a small scale generative LLM from scratch, a small scale proof of concept for our own Cerebros NotGPT model, then fine tunes it on additional data.\n", + "\n", + "The process is divided into two main phases:\n", + "\n", + "- Phase I-a: Neural Architecture Search (NAS) - We use SimpleCerebrosRandomSearch to automatically discover an effective neural network architecture from a small dataset.\n", + "- Phase I-b: Extended Training - The best architecture found in Phase I-a is then trained on a larger dataset to improve its performance.\n", + "\n", + "Finally, the trained model is evaluated and serialized for future use.\n", + "\n", + "\n", + "## Setup and Configuration\n", + "\n", + "Note: This script is configured as a vanilla-scale demo environment (4 CPU / 16 GB RAM Linux with Python 3.12). No GPU is needed, and this will run in the free version of Google Colab. \n", + "\n", + "## Vanilla Demo\n", + "\n", + "- For production use, you would significantly increase the sample sizes and adjust other parameters accordingly.\n", + "- The quality of the text generated by this minimal demo (trained on 30 text samples at a sequence length of 40) does not represent the quality of a full-scale model generated from the same code.\n", + "- A script that can be modified to do such as availible at: https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/main/train_a_generative_llm.py" + ], + "metadata": { + "id": "nnsAHoJyWLed" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NzJF6_JuWElV", + "outputId": "a0f3246f-0ccd-48ea-da55-86479bc0f93c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Python 3.12.12\n" + ] + } + ], + "source": [ + "! python --version" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Getting started: Download the repo and go to the main directory of the repo" + ], + "metadata": { + "id": "f6TD2XsKPJIY" + } + }, + { + "cell_type": "code", + "source": [ + "# Download the repo\n", + "! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcECFSs7WVsi", + "outputId": "9fd59935-35d4-4a08-9c8a-fb01fd3e4f03" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'cerebros-core-algorithm-alpha'...\n", + "remote: Enumerating objects: 8036, done.\u001b[K\n", + "remote: Counting objects: 100% (1737/1737), done.\u001b[K\n", + "remote: Compressing objects: 100% (321/321), done.\u001b[K\n", + "remote: Total 8036 (delta 1612), reused 1449 (delta 1411), pack-reused 6299 (from 2)\u001b[K\n", + "Receiving objects: 100% (8036/8036), 65.90 MiB | 21.67 MiB/s, done.\n", + "Resolving deltas: 100% (3116/3116), done.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# set the working directory\n", + "%cd cerebros-core-algorithm-alpha" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCpJGfD2WfLj", + "outputId": "e0fe8c05-6154-41cd-f489-08cfd2ad0fa8" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/cerebros-core-algorithm-alpha/cerebros-core-algorithm-alpha\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Next install all dependencies.\n", + "\n", + "There are 2 requirement files:\n", + " - requirements.txt: The core requirements of the neural architecture search\n", + " - cicd-requirements.txt: Requirements for NLP and text generation" + ], + "metadata": { + "id": "yT4hPXOKPU_8" + } + }, + { + "cell_type": "code", + "source": [ + "# Install the requirements for the core algorithm\n", + "! pip install -r requirements.txt; pip install -r cicd-requirements.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "nwElyEdpW90P", + "outputId": "170e2158-b7a9-49f0-ce63-22c4c7410f33" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: jax==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 1)) (0.5.3)\n", + "Requirement already satisfied: jaxlib==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 2)) (0.5.3)\n", + "Requirement already satisfied: pendulum==3.0.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 3)) (3.0.0)\n", + "Collecting tensorflow==2.20.0 (from -r requirements.txt (line 4))\n", + " Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n", + "Collecting numpy==2.3.5 (from -r requirements.txt (line 5))\n", + " Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n", + "Requirement already satisfied: pandas==2.3.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 6)) (2.3.3)\n", + "Requirement already satisfied: pyvis==0.3.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 7)) (0.3.2)\n", + "Requirement already satisfied: plotly==5.20.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 8)) (5.20.0)\n", + "Requirement already satisfied: matplotlib==3.10.7 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 9)) (3.10.7)\n", + "Requirement already satisfied: imageio==2.37.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 10)) (2.37.2)\n", + "Requirement already satisfied: tqdm==4.67.1 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 11)) (4.67.1)\n", + "Requirement already satisfied: ml_dtypes>=0.4.0 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (0.5.4)\n", + "Requirement already satisfied: opt_einsum in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (3.4.0)\n", + "Requirement already satisfied: scipy>=1.11.1 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (1.16.3)\n", + "Requirement already satisfied: python-dateutil>=2.6 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2.9.0.post0)\n", + "Requirement already satisfied: tzdata>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2025.2)\n", + "Requirement already satisfied: time-machine>=2.6.0 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (3.1.0)\n", + "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.4.0)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.9.23)\n", + "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.6.0)\n", + "Requirement already satisfied: google_pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.2.0)\n", + "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (18.1.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.0)\n", + "Requirement already satisfied: protobuf>=5.28.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (5.29.5)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.32.4)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (75.2.0)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.17.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.2.0)\n", + "Requirement already satisfied: typing_extensions>=3.6.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (4.15.0)\n", + "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.0.1)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.76.0)\n", + "Collecting tensorboard~=2.20.0 (from tensorflow==2.20.0->-r requirements.txt (line 4))\n", + " Using cached tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: keras>=3.10.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10.0)\n", + "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.15.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas==2.3.3->-r requirements.txt (line 6)) (2025.2)\n", + "Requirement already satisfied: ipython>=5.3.0 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (7.34.0)\n", + "Requirement already satisfied: jinja2>=2.9.6 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.1.6)\n", + "Requirement already satisfied: jsonpickle>=1.4.1 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (4.1.1)\n", + "Requirement already satisfied: networkx>=1.11 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.5)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.12/dist-packages (from plotly==5.20.0->-r requirements.txt (line 8)) (8.5.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.3.3)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (4.60.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.4.9)\n", + "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (11.3.0)\n", + "Requirement already satisfied: pyparsing>=3 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (3.2.5)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.45.1)\n", + "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.19.2)\n", + "Requirement already satisfied: decorator in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.4.2)\n", + "Requirement already satisfied: pickleshare in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.5)\n", + "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (5.7.1)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.52)\n", + "Requirement already satisfied: pygments in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (2.19.2)\n", + "Requirement already satisfied: backcall in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.0)\n", + "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.1)\n", + "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.9.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2>=2.9.6->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.3)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (13.9.4)\n", + "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.0)\n", + "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.18.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2025.11.12)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.1.3)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.12/dist-packages (from jedi>=0.16->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.8.5)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.12/dist-packages (from pexpect>4.3->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.0)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.12/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (4.0.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.2)\n", + "Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.7 MB)\n", + "Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.6 MB)\n", + "Using cached tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n", + "Installing collected packages: numpy, tensorboard, tensorflow\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 1.26.4\n", + " Uninstalling numpy-1.26.4:\n", + " Successfully uninstalled numpy-1.26.4\n", + " Attempting uninstall: tensorboard\n", + " Found existing installation: tensorboard 2.19.0\n", + " Uninstalling tensorboard-2.19.0:\n", + " Successfully uninstalled tensorboard-2.19.0\n", + " Attempting uninstall: tensorflow\n", + " Found existing installation: tensorflow 2.19.1\n", + " Uninstalling tensorflow-2.19.1:\n", + " Successfully uninstalled tensorflow-2.19.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "scikit-learn 1.4.1.post1 requires numpy<2.0,>=1.19.5, but you have numpy 2.3.5 which is incompatible.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n", + "tensorflow-text 2.19.0 requires tensorflow<2.20,>=2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n", + "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", + "numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.5 which is incompatible.\n", + "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", + "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n", + "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", + "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n", + "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "tf-keras 2.19.0 requires tensorflow<2.20,>=2.19, but you have tensorflow 2.20.0 which is incompatible.\n", + "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed numpy-2.3.5 tensorboard-2.20.0 tensorflow-2.20.0\n", + "Requirement already satisfied: tensorflow-text==2.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 1)) (2.19.0)\n", + "Requirement already satisfied: keras-nlp==0.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 2)) (0.19.0)\n", + "Requirement already satisfied: scikit-learn==1.4.1.post1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 3)) (1.4.1.post1)\n", + "Requirement already satisfied: tensorflow-hub==0.16.1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 4)) (0.16.1)\n", + "Requirement already satisfied: transformers==4.54.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 5)) (4.54.0)\n", + "Collecting tensorflow<2.20,>=2.19.0 (from tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n", + " Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: keras-hub==0.19.0 in /usr/local/lib/python3.12/dist-packages (from keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.19.0)\n", + "Collecting numpy<2.0,>=1.19.5 (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3))\n", + " Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.16.3)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.5.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (3.6.0)\n", + "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (5.29.5)\n", + "Requirement already satisfied: tf-keras>=2.14.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (2.19.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.20.0)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.36.0)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (6.0.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2024.11.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.32.4)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.21.4)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.7.0)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.67.1)\n", + "Requirement already satisfied: keras>=3.5 in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (3.10.0)\n", + "Requirement already satisfied: absl-py in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (1.4.0)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (13.9.4)\n", + "Requirement already satisfied: kagglehub in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.3.13)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.3.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.15.0)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (1.2.0)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (25.9.23)\n", + "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.6.0)\n", + "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.2.0)\n", + "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (18.1.1)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.4.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (75.2.0)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.17.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.2.0)\n", + "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (2.0.1)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.76.0)\n", + "Collecting tensorboard~=2.19.0 (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n", + " Using cached tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.15.1)\n", + "Requirement already satisfied: ml-dtypes<1.0.0,>=0.5.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.5.4)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.11.12)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.45.1)\n", + "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.0)\n", + "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.18.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.10)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.1.3)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (4.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (2.19.2)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.2)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.12/dist-packages (from werkzeug>=1.0.1->tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.0.3)\n", + "Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)\n", + "Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (645.0 MB)\n", + "Using cached tensorboard-2.19.0-py3-none-any.whl (5.5 MB)\n", + "Installing collected packages: numpy, tensorboard, tensorflow\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 2.3.5\n", + " Uninstalling numpy-2.3.5:\n", + " Successfully uninstalled numpy-2.3.5\n", + " Attempting uninstall: tensorboard\n", + " Found existing installation: tensorboard 2.20.0\n", + " Uninstalling tensorboard-2.20.0:\n", + " Successfully uninstalled tensorboard-2.20.0\n", + " Attempting uninstall: tensorflow\n", + " Found existing installation: tensorflow 2.20.0\n", + " Uninstalling tensorflow-2.20.0:\n", + " Successfully uninstalled tensorflow-2.20.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n", + "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", + "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", + "pytensor 2.35.1 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.\n", + "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n", + "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", + "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.19.1 which is incompatible.\n", + "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.\n", + "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed numpy-1.26.4 tensorboard-2.19.0 tensorflow-2.19.1\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "numpy", + "tensorflow" + ] + }, + "id": "d3a167bbbde043ef9a994c35060fda79" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **RESTART THE SESSION**\n", + "\n", + "Then proceed to the next cell which imports all necessary libraries and defines global constants and hyperparameters for the entire pipeline.\n" + ], + "metadata": { + "id": "v69rLBcmXyGD" + } + }, + { + "cell_type": "code", + "source": [ + "! ls" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ubtKyfBQzFEW", + "outputId": "6cbe44e6-3ce7-4227-982a-88d0d36d2205" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "cerebros-core-algorithm-alpha sample_data\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 1. # **ONLY IF** the directory cerebros-core-algorithm-alpha is not still\n", + "# there, clone the directory again.\n", + "# ! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git\n", + "\n", + "# 2. Set the working directory (in the new session) - DO run this.\n", + "%cd cerebros-core-algorithm-alpha" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NemXTsYgfE0s", + "outputId": "ca92342f-1f82-42ee-8562-980b1c8dd849" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/cerebros-core-algorithm-alpha\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Verify we are in the right place:\n", + "! pwd" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D3K4dSVQhrIc", + "outputId": "5a45fa94-1bb3-46ce-c362-27f456221fd6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/cerebros-core-algorithm-alpha\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Standard library imports\n", + "import subprocess\n", + "import time\n", + "from gc import collect\n", + "\n", + "# Third-party library imports\n", + "import tensorflow as tf\n", + "import pandas as pd\n", + "import pendulum\n", + "from transformers import AutoTokenizer\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Cerebros specific imports\n", + "from cerebros.units.units import DenseUnit\n", + "from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search import SimpleCerebrosRandomSearch\n", + "from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component import (\n", + " zero_7_exp_decay,\n", + " zero_95_exp_decay,\n", + " simple_sigmoid\n", + ")\n", + "from cerebrosllmutils.llm_utils import (\n", + " prepare_data,\n", + " InterleavedRoPE,\n", + " Perplexity,\n", + " CerebrosNotGPTConfig,\n", + " CerebrosNotGPT,\n", + " WarmupCosineDecayRestarts\n", + ")\n", + "\n", + "# Import the data source: Format List[str]\n", + "from vanilladatasets.web_english_bible import samples as bible\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WKCdCv96X4YX", + "outputId": "875f6626-4f4b-426c-c697-da9f186e440a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/jaxlib/plugin_support.py:71: RuntimeWarning: JAX plugin jax_cuda12_plugin version 0.7.2 is installed, but it is not compatible with the installed jaxlib version 0.5.3, so it will not be used.\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Data and Training Constants\n", + "\n", + "These parameters control the amount of data used and the behavior of the training stages.\n", + "\n", + "- **PHASE_I_A_SAMPLES_TO_CREATE**: Size of the subset of the dataset used for the NAS (Neural Architecture Search) stage (number of text samples).\n", + "- **PHASE_I_B_SAMPLES_TO_CREATE**: Number of samples to use for the main training task stage after Neural Architecture Search is completed.\n", + "- **PHASE_I_B_VAL_SPLIT**: Fraction of data for validation in Phase I-b.\n", + "- **PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE**: Batch size for preprocessing in Phase I-b to manage RAM.\n", + "- **PROMPT_LENGTH**: Number of tokens provided to the model to predict the next token. It is recommended to keep this as 1.\n" + ], + "metadata": { + "id": "rK0LZP7KbQqm" + } + }, + { + "cell_type": "code", + "source": [ + "# Samples to use for the neural architecture search stage\n", + "PHASE_I_A_SAMPLES_TO_CREATE = 10\n", + "\n", + "# Samples to use for the main training stage\n", + "PHASE_I_B_SAMPLES_TO_CREATE = 20\n", + "PHASE_I_B_VAL_SPLIT = 0.15\n", + "\n", + "# For Stage I-b, we preprocess in batches to avoid high RAM usage.\n", + "PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE = 10\n", + "\n", + "# How many tokens to provide before expecting the next token to be predicted.\n", + "PROMPT_LENGTH = 1\n" + ], + "metadata": { + "id": "vywbZQxAZC9R" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Model and Embedding Constants\n", + "\n", + "These constants define the size and shape of the model's text processing components.\n", + "\n", + "- **MAX_SEQ_LENGTH**: The maximum sequence length the model will handle. This has a linear relationship with RAM/CPU usage.\n", + "- **tokenizer_checkpoint**: The Hugging Face model to use for tokenization.\n", + "- **EMBEDDING_N**: A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). The resulting embedding dimensionality (EMBEDDING_DIM) for InterleavedRoPE must be an even number. Using this parameter as a proxy, rather than setting EMBEDDING_DIM directly, acts as a guard rail to ensure this constraint is met.\n", + "- **PROJECTION_N**: Controls the size of a projection layer after embedding. Increasing this value can significantly increase RAM usage.\n" + ], + "metadata": { + "id": "5jK5wbA5b8se" + } + }, + { + "cell_type": "code", + "source": [ + "# Text encoding / embedding related constants\n", + "MAX_SEQ_LENGTH = 40\n", + "\n", + "# Tokenization\n", + "tokenizer_checkpoint = \"HuggingFaceTB/SmolLM3-3B\"\n", + "tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)\n", + "\n", + "# Add special tokens for potential instruction-following formats\n", + "special_tokens = {\n", + " \"additional_special_tokens\": [\"\", \"\", \"\", \"\"]\n", + "}\n", + "tokenizer.add_special_tokens(special_tokens)\n", + "\n", + "VOCABULARY_SIZE = len(tokenizer)\n", + "\n", + "# For InterleavedRoPE, the embedding output dim must be an even number.\n", + "EMBEDDING_N = 6\n", + "EMBEDDING_DIM = int(EMBEDDING_N * 2)\n", + "\n", + "# Size of the projection layer. Keep low to manage RAM.\n", + "PROJECTION_N = 1\n" + ], + "metadata": { + "id": "4Kka_A4tb3aJ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6c85d1ae-52f4-4ddf-d768-ea5781b1b7da" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Stage I-a (NAS) Hyperparameters\n", + "\n", + "These parameters control the Neural Architecture Search process.\n", + "\n", + "- **moities_to_try**: Number of different layer permutations to try.\n", + "- **tries_per_moity**: Number of topologies to try for each permutation.\n", + "- **epochs, batch_size, learning_rate**: Standard training parameters for the NAS stage.\n", + "- **predecessor_level_connection_affinity_factor_first**: Controls connectivity density between the Input layer and the first level of Dense layers.\n", + "- **predecessor_level_connection_affinity_factor_main**: Controls connectivity density between the Input layer and the first level of Dense layers and the subsequent level of Dense layers, as well as all subsequent vertical connectivity.\n", + "- **p_lateral_connection, num_lateral_connection_tries_per_unit**: Control the density of lateral connectivity between Dense layers on the same row.\n", + "- **minimum_levels, maximum_levels**: Number of **rows of** Dense layers in the architecture grid.\n", + "- **minimum_units_per_level, maximum_units_per_level**: Number of Dense layers per row.\n", + "- **minimum_neurons_per_unit, maximum_neurons_per_unit**: The number of neurons for each Dense layer unit.\n" + ], + "metadata": { + "id": "MeoWtePacWz_" + } + }, + { + "cell_type": "code", + "source": [ + "# Cerebros [non-HP-tunable] configurables for NAS\n", + "moities_to_try = 3\n", + "tries_per_moity = 1\n", + "\n", + "### Main tunable hyperparameters for NAS ##\n", + "\n", + "POSITIONAL_EMBEDDING_DROPOUT = 0.7651951380000674\n", + "activation = 'softplus'\n", + "\n", + "# Vertical connectivity hyperparameters\n", + "predecessor_level_connection_affinity_factor_first = 17.851026458010523\n", + "predecessor_level_connection_affinity_factor_main = 21.487301631581428\n", + "\n", + "# Lateral connectivity hyperparameters\n", + "max_consecutive_lateral_connections = 7\n", + "p_lateral_connection = 0.24927354102044022\n", + "num_lateral_connection_tries_per_unit = 32\n", + "learning_rate = 0.003025583248301791\n", + "epochs = 41\n", + "batch_size = 5\n", + "gradient_accumulation_steps = 4\n", + "\n", + "# Architecture grid constraints\n", + "minimum_levels = 2\n", + "maximum_levels = 2\n", + "minimum_units_per_level = 2\n", + "maximum_units_per_level = 2\n", + "minimum_neurons_per_unit = 2\n", + "maximum_neurons_per_unit = 2\n" + ], + "metadata": { + "id": "Wbowkxnbc4Zd" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Stage I-b (Extended Training) Hyperparameters\n", + "\n", + "These parameters are for fine-tuning the best model from Stage I-a.\n", + "\n", + "- INITIAL_LR_STAGE_I_B: Initial learning rate for this phase.\n", + "- WARMUP_EPOCHS_STAGE_I_B, WARMUP_STEPS: Parameters for the learning rate scheduler.\n", + "- phase_i_b_epochs: Number of epochs for extended training.\n", + "- phase_i_b_weight_decay: Weight decay for the optimizer.\n" + ], + "metadata": { + "id": "fcGTs9ASdXps" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "## Training Stage I-b parameters:\n", + "INITIAL_LR_STAGE_I_B = 0.0039295722955565125\n", + "WARMUP_EPOCHS_STAGE_I_B = 7\n", + "WARMUP_STEPS = 1140\n", + "FIRST_DECAY_STEPS_STAGE_I_B = 1900\n", + "phase_i_b_epochs = 53\n", + "phase_i_b_gradient_accumulation_steps = 7\n", + "phase_i_b_weight_decay = 0.01647018768215773 # For AdamW\n" + ], + "metadata": { + "id": "-znwaddIdiKU" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Generation Constants\n", + "\n", + "Parameters used during the text generation evaluation phase." + ], + "metadata": { + "id": "vy5y6OXhdvzV" + } + }, + { + "cell_type": "code", + "source": [ + "## Generation time configurables:\n", + "GENERATION_PROMPT_LEN = 25\n", + "MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN" + ], + "metadata": { + "id": "JHjCz9qXd5Gq" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# **Data Preparation**\n", + "\n", + "Here, we load and subset the dataset for both training Stages.\n", + "\n", + "\n", + "We first split the Bible text samples into two sets: one for Phase I-a (NAS) and a larger one for Phase I-b (extended training).\n" + ], + "metadata": { + "id": "N7fJIZ1md-0Y" + } + }, + { + "cell_type": "code", + "source": [ + "# Get training data from the bible text samples\n", + "non_instruct_samples = bible[:PHASE_I_A_SAMPLES_TO_CREATE]\n", + "phase_i_b_samples = bible[PHASE_I_A_SAMPLES_TO_CREATE:PHASE_I_B_SAMPLES_TO_CREATE + PHASE_I_A_SAMPLES_TO_CREATE]\n", + "\n", + "print(f\"Samples from KJV bible consisting of {len(non_instruct_samples)} look like this (sub-sample of 3): {non_instruct_samples[:3]}\")\n" + ], + "metadata": { + "id": "jIFxWcBzeLjN", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d46f8e34-3d7d-4fb4-dddc-bf1c45bae7ee" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Samples from KJV bible consisting of 10 look like this (sub-sample of 3): ['In the beginning God created the heavens and the earth.', \"The earth was formless and empty, with darkness over the deep and God's Spirit hovering over the waters.\", \"God said, 'Let there be light,' and there was light.\"]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Preprocess Data for Phase I-a (NAS)\n", + "\n", + "The Cerebros LLM is a single head model. This means that each time the model is called, it returns only the next token. It does not regurgitate the cumulative sequence, nor does it have a separate head for each position in the sequence.\n", + "\n", + "For both training stages, each text sample is expanded into multiple input/label pairs, which we call \"sub-samples.\" There is one \"sub-sample\" for each token in the range between the first token and the first occurrence of a padding token or the end of the sequence, whichever comes first.\n", + "\n", + "For example, the sequence [t1, t2, t3] becomes:\n", + "\n", + " Input: [t1, 2, 2, 2] Label: [t2] # One hot encoded to VOCABULARY_SIZE\n", + " Input: [t1, t2, 2, 2], Label: [t3]\n", + " Input: [t1, t2, t3, 2], Label: [2]\n", + "\n", + "For training Stage 1-a, we perform the entire expansion for its training data in memory. This is because the NAS does not yet support a tf.data.Dataset object. In the future, we may retrofit the NAS algorithm to support streaming preprocessing as well, allowing us to use a larger dataset for the NAS.\n", + "\n", + "For stage I-b, the extended training stage, the same operation is done in batches. This is because this operation significantly increases the amount of memory required. The main reason for this is the one-hot encoded label, where the vocabulary size is 128,260. Since we do this in batches, this allows for a virtually unlimited number of samples to be processed.\n", + "\n", + "For reference, this is the preprocessing being applied:\n", + "\n", + "```python\n", + "def prepare_data(\n", + " data_0: List[str],\n", + " tokenizer_0: Any,\n", + " max_seq_length: int = 1024,\n", + " prompt_length: int = 1) -> Tuple[List[List[int]], List[List[int]], int]:\n", + "\n", + "\n", + " all_input_ids = []\n", + " all_labels = []\n", + "\n", + " pad_token_id = tokenizer_0.pad_token_id\n", + "\n", + " # Tokenize all data at once for efficiency\n", + " tokenized_data = tokenizer_0(\n", + " data_0,\n", + " max_length=max_seq_length,\n", + " padding='max_length',\n", + " truncation=True,\n", + " add_special_tokens=False # We'll handle special tokens manually\n", + " )\n", + " vocab_size = len(tokenizer_0)\n", + "\n", + " # Get the token ID for \n", + " end_prompt_token_id = tokenizer_0.encode(\"\", add_special_tokens=False)[0]\n", + "\n", + " # Process each sample\n", + " for sample_tokens in tokenized_data['input_ids']:\n", + " # Find the index of token\n", + " try:\n", + " end_prompt_index = sample_tokens.index(end_prompt_token_id)\n", + " except ValueError:\n", + " # If not found, treat sample as a non-instruct sample\n", + " end_prompt_index = (\n", + " prompt_length - 1) # int(np.ceil(len(sample_tokens) * (1/3))) # 0 ## 1. Give it a fair starting place to predict the next word 2. reduce the number of expanded samples\n", + "\n", + " # Find first pad token after \n", + " first_pad_index = None\n", + " for i in range(end_prompt_index + 1, len(sample_tokens)):\n", + " if sample_tokens[i] == pad_token_id:\n", + " first_pad_index = i\n", + " break\n", + "\n", + " # If no pad token found, use the end of sequence\n", + " if first_pad_index is None:\n", + " first_pad_index = len(sample_tokens)\n", + "\n", + " # Apply sliding window from after to first pad token\n", + " # Start from end_prompt_index + 1 (first token to predict)\n", + " # End at first_pad_index - 1 (last token to predict)\n", + " for i in range(end_prompt_index + 1, first_pad_index):\n", + " # Input: from start up to (but not including) token i\n", + " input_ids = sample_tokens[:i]\n", + "\n", + " # Pad or truncate to max_seq_length\n", + " if len(input_ids) > max_seq_length:\n", + " input_ids = input_ids[:max_seq_length]\n", + " else:\n", + " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n", + "\n", + " # Label: one-hot encoding of token at position i\n", + " next_token = sample_tokens[i]\n", + " label = [0] * vocab_size\n", + " label[next_token] = 1\n", + "\n", + " all_input_ids.append(input_ids)\n", + " all_labels.append(label)\n", + "\n", + " # Add final sample with pad token as label to indicate termination\n", + " if first_pad_index < len(sample_tokens): # Only if there's actually a pad token\n", + " input_ids = sample_tokens[:first_pad_index]\n", + "\n", + " # Pad or truncate to max_seq_length\n", + " if len(input_ids) > max_seq_length:\n", + " input_ids = input_ids[:max_seq_length]\n", + " else:\n", + " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n", + "\n", + " # Label: one-hot encoding of pad token\n", + " label = [0] * vocab_size\n", + " label[pad_token_id] = 1\n", + "\n", + " all_input_ids.append(input_ids)\n", + " all_labels.append(label)\n", + "\n", + " return all_input_ids, all_labels, vocab_size\n", + "```\n" + ], + "metadata": { + "id": "8Tu8X9cVeQVD" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Preprocess data for Stage I-a training\n", + "x, y, vocab_size = prepare_data(data_0=non_instruct_samples,\n", + " tokenizer_0=tokenizer,\n", + " max_seq_length=MAX_SEQ_LENGTH,\n", + " prompt_length=PROMPT_LENGTH)\n", + "\n", + "# Split the preprocessed data for NAS training and validation\n", + "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.85, shuffle=False)\n", + "\n", + "# Package data into lists for the Cerebros AutoML component\n", + "x_train_tf = tf.constant(X_train, tf.int32)\n", + "y_train_tf = tf.constant(y_train, tf.float32)\n", + "x_train_packaged = [x_train_tf]\n", + "y_train_packaged = [y_train_tf]\n", + "\n", + "# Do the same for the validation data\n", + "x_test_tf = tf.constant(X_test, tf.int32)\n", + "y_test_tf = tf.constant(y_test, tf.float32)\n", + "x_test_packaged = [x_test_tf]\n", + "y_test_packaged = [y_test_tf]\n", + "\n", + "# Define input and output shapes for the AutoML model\n", + "INPUT_SHAPES = [(MAX_SEQ_LENGTH,)]\n", + "OUTPUT_SHAPES = [(VOCABULARY_SIZE)]\n" + ], + "metadata": { + "id": "EDyuTMLufYvs" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Train, Test Split of the Data for Stage I-b training\n", + "\n", + "We split the larger Phase I-b dataset into training and validation sets. Again, this dataset will be processed by a streaming generator in batches to avoid memory saturation and make the training more scalable. We will revisit that later." + ], + "metadata": { + "id": "zX60zcpykasl" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Split the phase I-b data set for training and validation\n", + "phase_i_b_train_samples, phase_i_b_val_samples = train_test_split(\n", + " phase_i_b_samples,\n", + " test_size=PHASE_I_B_VAL_SPLIT,\n", + " shuffle=False\n", + ")\n" + ], + "metadata": { + "id": "SMSdkFRPkg7D" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "phase_i_b_train_samples[:3]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Oqw-T7bOo1GD", + "outputId": "2e8f24fc-24c2-4a06-babb-550b676b7751" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[\"God said, 'Let the earth produce vegetation, seed-bearing plants, and fruit trees, each according to its kind,' and it was so.\",\n", + " 'The earth brought forth grass, seed-bearing herbs, and fruit trees, each with its seed, and God saw that it was good.',\n", + " 'There was evening and morning, the third day.']" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "X_train[:2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Hv_52izIjOQ7", + "outputId": "e2972924-0190-4f16-9317-c00100486203" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[[644,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012],\n", + " [644,\n", + " 279,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012]]" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Base Text Embedding Model Definition\n", + "\n", + "- Before we run the NAS, we define a base model that handles token embeddings and positional embeddings.\n", + "- The SimpleCerebrosRandomSearch will then attach its auto-generated lattice of dense layers on top of this base model.\n", + "- The Cerebros NAS takes an init parameter base_models: List[tf.keras.Model]\n" + ], + "metadata": { + "id": "11Ri4PtKktih" + } + }, + { + "cell_type": "code", + "source": [ + "####### Text embedding base model #####################\n", + "\n", + "inp = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype=tf.int32)\n", + "\n", + "# Token embedding layer\n", + "embedded = tf.keras.layers.Embedding(\n", + " input_dim=VOCABULARY_SIZE,\n", + " output_dim=EMBEDDING_DIM,\n", + " input_length=MAX_SEQ_LENGTH,\n", + " mask_zero=False\n", + ")(inp)\n", + "\n", + "# Interleaved Rotary Positional Embedding (iRoPE)\n", + "position_embedding = InterleavedRoPE(\n", + " dim=EMBEDDING_DIM,\n", + " max_seq_len=MAX_SEQ_LENGTH,\n", + ")(embedded)\n", + "\n", + "# Concatenate token and positional embeddings\n", + "x = tf.keras.layers.Concatenate()([embedded, position_embedding])\n", + "x = tf.keras.layers.Dropout(POSITIONAL_EMBEDDING_DROPOUT)(x)\n", + "\n", + "# Flatten and project to the desired dimension\n", + "flattened = tf.keras.layers.Flatten()(x)\n", + "projected = tf.keras.layers.Dense(EMBEDDING_DIM * PROJECTION_N)(flattened)\n", + "\n", + "# Create the base Keras model\n", + "cerebros_base_model = tf.keras.Model(\n", + " inputs=inp,\n", + " outputs=projected\n", + ")\n" + ], + "metadata": { + "id": "tn1qrGISn_Pe", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e76e091c-6e7f-4820-ef79-15143f1e6b64" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/layers/core/embedding.py:97: UserWarning: Argument `input_length` is deprecated. Just remove it.\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## FYI: The iRoPE Embedding:\n", + "\n", + "The RoPE embedding, and helper functions it depends on (previously imported from the local package cerebrosllmutils):\n", + "\n", + "- iRoPE: Interleaved Rotary Positional Embedding\n", + "- RoPE: Rotary Positional Embedding\n", + "- The Rotary Positional Embedding expresses positional relationships as angles, extends feasible context window.\n", + "- iRoPE: iRoPE applies the rotation in an interleaved manner and enables capturing more nuance and extending context windows feasible to around 2 million tokens.\n", + "\n", + "```python\n", + "# --- Base Rotary Positional Embedding\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='RotaryEmbedding')\n", + "class RotaryEmbedding(tf.keras.layers.Layer):\n", + " def __init__(self, dim, max_seq_len=1024, temperature=10000.0, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.dim = dim\n", + " # Ensure dim is even right at initialization\n", + " if self.dim % 2 != 0:\n", + " raise ValueError(f\"Embedding dimension `dim` ({self.dim}) must be even for RotaryEmbedding.\")\n", + " self.max_seq_len = max_seq_len\n", + " self.temperature = temperature\n", + " # *** No calculation or storage of inv_freq here or in build ***\n", + "\n", + " def build(self, input_shape):\n", + " # Build should primarily be for creating trainable weights, which we don't have.\n", + " # Call super().build() for Keras compatibility.\n", + " super().build(input_shape)\n", + "\n", + " def call(self, x): # Removed seq_len argument, calculate from x\n", + " shape = tf.shape(x)\n", + " batch_size = shape[0]\n", + " actual_seq_len = shape[1]\n", + "\n", + " # *** Calculate inv_freq inside call ***\n", + " inv_freq_base = tf.range(0, self.dim, 2, dtype=tf.float32)\n", + " inv_freq = 1.0 / (self.temperature ** (inv_freq_base / self.dim))\n", + " # Ensure inv_freq has the correct shape [dim/2]\n", + " inv_freq = tf.cast(inv_freq, dtype=x.dtype) # Match dtype early\n", + "\n", + " # Use actual_seq_len for calculations\n", + " position = tf.range(actual_seq_len, dtype=x.dtype) # Match dtype\n", + "\n", + " # Calculate sinusoid input using einsum or broadcasting\n", + " # Einsum approach: Ensure correct dimensions [seq_len, dim/2]\n", + " sinusoid_inp = tf.einsum(\"i,j->ij\", position, inv_freq)\n", + "\n", + " # Calculate sin and cos based on the actual sequence length\n", + " sin = tf.sin(sinusoid_inp)\n", + " cos = tf.cos(sinusoid_inp)\n", + "\n", + " # Repeat sin/cos for interleaving: [a, b] -> [a, a, b, b]\n", + " # Result needs shape [actual_seq_len, dim]\n", + " sin = tf.repeat(sin, 2, axis=-1)\n", + " cos = tf.repeat(cos, 2, axis=-1)\n", + "\n", + " # Expand dims for batch and tile\n", + " # Output shape needs to be [batch_size, actual_seq_len, dim]\n", + " # Add batch dimension: [1, actual_seq_len, dim]\n", + " sin = tf.expand_dims(sin, axis=0)\n", + " cos = tf.expand_dims(cos, axis=0)\n", + "\n", + " # Tile to match the batch size: [batch_size, actual_seq_len, dim]\n", + " sin = tf.tile(sin, [batch_size, 1, 1])\n", + " cos = tf.tile(cos, [batch_size, 1, 1])\n", + "\n", + " # Casting to x.dtype was already done for inv_freq, sin/cos will inherit\n", + " # sin = tf.cast(sin, x.dtype) # Already done via calculation chain\n", + " # cos = tf.cast(cos, x.dtype) # Already done via calculation chain\n", + "\n", + " # Return sin and cos needed by InterleavedRoPE\n", + " return sin, cos\n", + "\n", + " def get_config(self):\n", + " config = super().get_config()\n", + " config.update({\n", + " \"dim\": self.dim,\n", + " \"max_seq_len\": self.max_seq_len,\n", + " \"temperature\": self.temperature,\n", + " })\n", + " return config\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " return cls(**config)\n", + "\n", + "\n", + "# iRoPE helper functions\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='split_alternate')\n", + "def split_alternate(x):\n", + " shape = tf.shape(x)\n", + " x = tf.reshape(x, [shape[0], shape[1], shape[2] // 2, 2])\n", + " x = tf.transpose(x, [0, 1, 3, 2])\n", + " x = tf.reshape(x, [shape[0], shape[1], -1])\n", + " return x\n", + "\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='rotate_half')\n", + "def rotate_half(x):\n", + " x = split_alternate(x)\n", + " d = tf.shape(x)[-1]\n", + " rotated_x = tf.concat([-x[..., d // 2:], x[..., :d // 2]], axis=-1)\n", + " return tf.reshape(rotated_x, tf.shape(x))\n", + "\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='apply_rotary_pos_emb')\n", + "def apply_rotary_pos_emb(x, sin, cos):\n", + " cos = tf.reshape(cos, [tf.shape(cos)[0], tf.shape(cos)[1], -1])\n", + " sin = tf.reshape(sin, [tf.shape(sin)[0], tf.shape(sin)[1], -1])\n", + " x_rotated = x * cos + rotate_half(x) * sin\n", + " return x_rotated\n", + "\n", + "\n", + "# interleaved Rotary Postional Embedding (iRoPE)\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='InterleavedRoPE')\n", + "class InterleavedRoPE(tf.keras.layers.Layer):\n", + " def __init__(self, dim, max_seq_len=1024, **kwargs):\n", + " super().__init__(**kwargs)\n", + " if dim % 2 != 0:\n", + " raise ValueError(f\"Embedding dimension `dim` ({dim}) must be even for InterleavedRoPE.\")\n", + " self.dim = dim\n", + " self.max_seq_len = max_seq_len\n", + " # Instantiate the RotaryEmbedding layer\n", + " # Ensure the name is consistent if needed for saving/loading\n", + " self.rotary_emb = RotaryEmbedding(dim, max_seq_len, name=\"rotary_embedding\")\n", + "\n", + " def call(self, x):\n", + " # Get sin and cos from the RotaryEmbedding layer's call method\n", + " # *** Pass only 'x'. RotaryEmbedding calculates seq_len internally. ***\n", + " sin, cos = self.rotary_emb(x)\n", + "\n", + " # Apply the positional embeddings\n", + " x_embedded = apply_rotary_pos_emb(x, sin, cos)\n", + " return x_embedded\n", + "\n", + " def get_config(self):\n", + " config = super().get_config()\n", + " config.update({\n", + " \"dim\": self.dim,\n", + " \"max_seq_len\": self.max_seq_len,\n", + " })\n", + " # Keras handles nested layer serialization automatically\n", + " return config\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " # Keras handles nested layer restoration automatically\n", + " return cls(**config)\n", + "```" + ], + "metadata": { + "id": "CXtYv20vpkMY" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Custom metric Perplexity (previously imported from the local package cerebrosllmutils):\n", + "\n", + "Since there is not a Perplexity metric in tensorflow.keras.metrics, we created our own, and one designed for this single - head model.\n", + "\n", + "## This is what it looks like:\n", + "\n", + "```python\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='Perplexity')\n", + "class Perplexity(tf.keras.metrics.Metric):\n", + " \"\"\"\n", + " Computes perplexity, defined as e^(categorical crossentropy).\n", + " \"\"\"\n", + "\n", + " def __init__(self, name='perplexity', **kwargs):\n", + " super().__init__(name=name, **kwargs)\n", + " self.total_crossentropy = self.add_weight(name='total_crossentropy', initializer='zeros')\n", + " self.count = self.add_weight(name='count', initializer='zeros')\n", + "\n", + " def update_state(self, y_true, y_pred, sample_weight=None):\n", + " # Calculate categorical crossentropy\n", + " crossentropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)\n", + "\n", + " # Update the running sum of crossentropy and the count of samples\n", + " self.total_crossentropy.assign_add(tf.reduce_sum(crossentropy))\n", + " self.count.assign_add(tf.cast(tf.shape(y_true)[0], dtype=tf.float32))\n", + "\n", + " def result(self):\n", + " # Compute the average crossentropy\n", + " average_crossentropy = self.total_crossentropy / self.count\n", + " # Compute perplexity as e^(average crossentropy)\n", + " return tf.exp(average_crossentropy)\n", + "\n", + " def reset_state(self):\n", + " # Reset the state variables\n", + " self.total_crossentropy.assign(0.0)\n", + " self.count.assign(0.0)\n", + "```\n" + ], + "metadata": { + "id": "uN3adqRLo61X" + } + }, + { + "cell_type": "code", + "source": [ + "# Custom metric: Perplexity\n", + "perplexity_metric = Perplexity()" + ], + "metadata": { + "id": "_8uTBW_to7iQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Stage I-a training: Neural Architecture Search (NAS)\n", + "\n", + "We now run the SimpleCerebrosRandomSearch to find the best performing architecture based on the training data and the base model. The search aims to minimize the perplexity in the train set. The search aims to minimize the perplexity in the training set. Obviously, in a full - scale run, we would use the validation set's value.\n", + "\n", + "- The Cerebros NAS will parse a block composed of rows (Levels) of multiple Dense layers (Units) with an overlapping, interleaved, interwoven topology both laterally between Dense layers on the same row and vertically between layers on different levels.\n", + "- This topology emulates the neuroscience principle of modularity.\n", + "- This topology allows local clusters of densely connected neurons to learn specialized fragments of a problem, while allowing efficient communication between these clusters to coordinate among themselves to compose a solution to a complex problem.\n", + "\n", + "For the deep technical details of how Cerebros NAS works: [How Cerebros NAS Works](https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/277-attempt-to-imporve-parameters-on--dev-branch-275/documentation/cerebros-technical-details.md)\n", + "\n", + "## This is what a neural network parsed by Cerebros looks like:\n", + "\n", + "- Green triangles: Input layers\n", + "- Blue squares: Concatenate layer -> [BatchNormalization | Dropout]\n", + "- Pink ovals: Hidden Dense layers\n", + "- Red oval: Output Dense layer\n" + ], + "metadata": { + "id": "tWjbHiHRMhR4" + } + }, + { + "cell_type": "markdown", + "source": [ + "![Brain-lookalike1.png]()" + ], + "metadata": { + "id": "1wR8EVItNNh_" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "## For a more readable view of that this looks like\n", + "\n", + "![image.png]()\n" + ], + "metadata": { + "id": "_bXR1QxaLPiq" + } + }, + { + "cell_type": "code", + "source": [ + "######## Instantiate Cerebros Neural Architecture Search #######\n", + "\n", + "# Project metadata\n", + "TIME = pendulum.now(tz='America/New_York').__str__()[:16].replace('T', '_').replace(':', '_').replace('-', '_')\n", + "PROJECT_NAME = f'{TIME}_cerebros_not-gpt'\n", + "meta_trial_number = 42\n", + "\n", + "# Initialize the AutoML search\n", + "cerebros_automl = SimpleCerebrosRandomSearch(\n", + " unit_type=DenseUnit,\n", + " input_shapes=INPUT_SHAPES,\n", + " output_shapes=OUTPUT_SHAPES,\n", + " training_data=x_train_packaged,\n", + " labels=y_train_packaged,\n", + " validation_split=0.2,\n", + " direction='minimize',\n", + " metric_to_rank_by=\"perplexity\",\n", + " minimum_levels=minimum_levels,\n", + " maximum_levels=maximum_levels,\n", + " minimum_units_per_level=minimum_units_per_level,\n", + " maximum_units_per_level=maximum_units_per_level,\n", + " minimum_neurons_per_unit=minimum_neurons_per_unit,\n", + " maximum_neurons_per_unit=maximum_neurons_per_unit,\n", + " activation=activation,\n", + " final_activation='softmax',\n", + " number_of_architecture_moities_to_try=moities_to_try,\n", + " number_of_tries_per_architecture_moity=tries_per_moity,\n", + " predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,\n", + " predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,\n", + " predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,\n", + " max_consecutive_lateral_connections=max_consecutive_lateral_connections,\n", + " p_lateral_connection=p_lateral_connection,\n", + " p_lateral_connection_decay=zero_95_exp_decay,\n", + " num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,\n", + " learning_rate=learning_rate,\n", + " loss=tf.keras.losses.CategoricalCrossentropy(),\n", + " metrics=[tf.keras.metrics.CategoricalAccuracy(), perplexity_metric],\n", + " epochs=epochs,\n", + " project_name=f\"{PROJECT_NAME}_meta_{meta_trial_number}\",\n", + " model_graphs='model_graphs',\n", + " batch_size=batch_size,\n", + " gradient_accumulation_steps=gradient_accumulation_steps,\n", + " meta_trial_number=meta_trial_number,\n", + " base_models=[cerebros_base_model],\n", + " train_data_dtype=tf.int32\n", + ")" + ], + "metadata": { + "id": "XV2q_5WEwBJ0" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run the Cerebros Neural Architecture Search\n" + ], + "metadata": { + "id": "TJVLfmJ2virA" + } + }, + { + "cell_type": "code", + "source": [ + "cerebros_t0 = time.time()\n", + "phase_i_a_result_0 = cerebros_automl.run_random_search()\n", + "cerebros_t1 = time.time()\n", + "\n", + "# Report results\n", + "cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0) / 60\n", + "models_tried = moities_to_try * tries_per_moity\n", + "cerebros_time_per_model = cerebros_time_all_models_min / models_tried\n", + "phase_i_a_result = float(phase_i_a_result_0)\n", + "\n", + "print(f\"Cerebros trained {models_tried} models in {cerebros_time_all_models_min:.2f} min. Average time per model: {cerebros_time_per_model:.2f} min.\")\n", + "print(f'Cerebros best perplexity achieved in Phase I-a is {phase_i_a_result}')" + ], + "metadata": { + "id": "ulL0EGnow5L7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "d56dd1ec-2f7b-4a3c-ecc6-75e595910367" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\rGlobal task progress: 0%|\u001b[38;2;22;206;235m \u001b[0m| 0/3 [00:00nnf>ceil\n", + "k is: 0 value is: [{'1': }]\n", + "0\n", + "k is: 1 value is: [{'2': }, {'2': }]\n", + "1\n", + "Trying to create level 1\n", + "We think level 1's predecessors are: [0]\n", + "k is: 2 value is: [{'128260': }]\n", + "2\n", + "Trying to create Final level 2\n", + "Trying to create level 2\n", + "We think level final level 2's predecessors are: [0, 1]\n", + "levels:\n", + "[0, 1, 2]\n", + "{'0': 'InputUnitModule'}\n", + "InputLevel.input_shapes [(40,)]\n", + "{'2': }\n", + "{'2': }\n", + "Debug: I am 2 selecting 1\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", + "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", + "Debug: successor_connectivity_errors_2d []\n", + "$$$$$$>>>>> Base model: \n", + "InputUnit.input_shape: (40,)\n", + "{'2': }\n", + "{'2': }\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_1 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "{'128260': }\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_FinalDenseLevel_0000000000000002_tr_0_FinalDenseUnit_0000000000000002_tr_0_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "inputs\n", + "\n", + "\n", + "outputs\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_0_nn_materialized\"\u001b[0m\n" + ], + "text/html": [ + "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_0_nn_materialized\"\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", + "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", + "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", + "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" + ], + "text/html": [ + "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
+              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
+              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
+              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" + ], + "text/html": [ + "
 Total params: 52,476,644 (200.18 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" + ], + "text/html": [ + "
 Trainable params: 52,474,124 (200.17 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" + ], + "text/html": [ + "
 Non-trainable params: 2,520 (9.84 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n", + "Epoch 1/41\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", + "Expected: NeuralNetworkFuture_0000000000000nan_tr_0_InputLevel_0000000000000000_tr_0_InputUnit_0000000000000000_tr_0_0_inp\n", + "Received: inputs=('Tensor(shape=(None, 40))',)\n", + " warnings.warn(msg)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 752ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.7672 - perplexity: 128956.3438 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7557 - val_perplexity: 127482.9922\n", + "Epoch 2/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 547ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.6423 - perplexity: 113970.0938 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7555 - val_perplexity: 127447.9844\n", + "Epoch 3/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 785ms/step - categorical_accuracy: 0.1574 - loss: 11.5549 - perplexity: 104629.7031 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7555 - val_perplexity: 127457.9531\n", + "Epoch 4/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 510ms/step - categorical_accuracy: 0.1518 - loss: 11.2911 - perplexity: 80904.3125 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7396 - val_perplexity: 125437.5078\n", + "Epoch 5/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 594ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.7358 - perplexity: 125450.3906 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7296 - val_perplexity: 124199.8984\n", + "Epoch 6/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 869ms/step - categorical_accuracy: 0.1185 - loss: 11.0556 - perplexity: 65319.5391 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7240 - val_perplexity: 123501.3828\n", + "Epoch 7/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 589ms/step - categorical_accuracy: 0.0506 - loss: 11.3671 - perplexity: 90319.2578 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7149 - val_perplexity: 122378.4219\n", + "Epoch 8/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 539ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.1610 - perplexity: 70926.6328 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7186 - val_perplexity: 122839.8203\n", + "Epoch 9/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 629ms/step - categorical_accuracy: 0.1496 - loss: 10.9728 - perplexity: 66133.8672 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7250 - val_perplexity: 123618.0391\n", + "Epoch 10/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 561ms/step - categorical_accuracy: 0.1475 - loss: 10.0717 - perplexity: 24002.7051 - val_categorical_accuracy: 0.1667 - val_loss: 11.7307 - val_perplexity: 124332.1562\n", + "Epoch 11/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 558ms/step - categorical_accuracy: 0.2024 - loss: 10.4731 - perplexity: 36918.5938 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7395 - val_perplexity: 125429.9766\n", + "Epoch 12/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 530ms/step - categorical_accuracy: 0.0000e+00 - loss: 10.1507 - perplexity: 27366.1113 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7502 - val_perplexity: 126783.1797\n", + "Epoch 13/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 750ms/step - categorical_accuracy: 0.0734 - loss: 10.4913 - perplexity: 43854.1094 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7526 - val_perplexity: 127089.4531\n", + "Epoch 14/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 583ms/step - categorical_accuracy: 0.3086 - loss: 9.0654 - perplexity: 9803.9824 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7553 - val_perplexity: 127423.6797\n", + "Epoch 15/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 528ms/step - categorical_accuracy: 0.2697 - loss: 9.0867 - perplexity: 10961.3613 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7623 - val_perplexity: 128316.8125\n", + "Epoch 16/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 582ms/step - categorical_accuracy: 0.0685 - loss: 9.1616 - perplexity: 10116.4492 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.8354 - val_perplexity: 138047.7344\n", + "Epoch 17/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 797ms/step - categorical_accuracy: 0.1518 - loss: 7.9130 - perplexity: 2808.9939 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.8843 - val_perplexity: 144976.3594\n", + "Epoch 18/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 1s/step - categorical_accuracy: 0.2169 - loss: 7.4165 - perplexity: 1843.1222 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.9479 - val_perplexity: 154489.3906\n", + "Epoch 19/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 510ms/step - categorical_accuracy: 0.1996 - loss: 8.1748 - perplexity: 4106.3154 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.0354 - val_perplexity: 168615.2344\n", + "Epoch 20/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 742ms/step - categorical_accuracy: 0.0839 - loss: 7.6041 - perplexity: 2107.0347 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.1744 - val_perplexity: 193765.5312\n", + "Epoch 21/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 601ms/step - categorical_accuracy: 0.2080 - loss: 7.4821 - perplexity: 1883.4858 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.2587 - val_perplexity: 210814.2656\n", + "Epoch 22/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 526ms/step - categorical_accuracy: 0.2036 - loss: 7.1881 - perplexity: 1867.2930 - val_categorical_accuracy: 0.1667 - val_loss: 12.3405 - val_perplexity: 228771.9219\n", + "Epoch 23/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 792ms/step - categorical_accuracy: 0.1919 - loss: 7.0572 - perplexity: 1222.1584 - val_categorical_accuracy: 0.1667 - val_loss: 12.4140 - val_perplexity: 246219.7031\n", + "Epoch 24/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 688ms/step - categorical_accuracy: 0.1685 - loss: 5.6640 - perplexity: 308.2304 - val_categorical_accuracy: 0.1667 - val_loss: 12.5863 - val_perplexity: 292515.5625\n", + "Epoch 25/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 609ms/step - categorical_accuracy: 0.1407 - loss: 6.4666 - perplexity: 751.4036 - val_categorical_accuracy: 0.1667 - val_loss: 12.6761 - val_perplexity: 320013.0000\n", + "Epoch 26/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 895ms/step - categorical_accuracy: 0.0839 - loss: 5.3843 - perplexity: 352.6757 - val_categorical_accuracy: 0.1667 - val_loss: 12.7511 - val_perplexity: 344943.8125\n", + "Epoch 27/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 516ms/step - categorical_accuracy: 0.2120 - loss: 5.6307 - perplexity: 300.5551 - val_categorical_accuracy: 0.1667 - val_loss: 12.8756 - val_perplexity: 390664.9688\n", + "Epoch 28/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 614ms/step - categorical_accuracy: 0.1685 - loss: 4.4140 - perplexity: 99.5634 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1954 - val_perplexity: 537862.5000\n", + "Epoch 29/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 517ms/step - categorical_accuracy: 0.0568 - loss: 5.8209 - perplexity: 412.2969 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3590 - val_perplexity: 633498.9375\n", + "Epoch 30/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 748ms/step - categorical_accuracy: 0.1864 - loss: 4.9144 - perplexity: 157.2443 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5253 - val_perplexity: 748103.1875\n", + "Epoch 31/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 507ms/step - categorical_accuracy: 0.1052 - loss: 8.2503 - perplexity: 22384.6094 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6228 - val_perplexity: 824754.3750\n", + "Epoch 32/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 612ms/step - categorical_accuracy: 0.4431 - loss: 4.0581 - perplexity: 75.1973 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0377 - val_perplexity: 1248790.8750\n", + "Epoch 33/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 766ms/step - categorical_accuracy: 0.2086 - loss: 5.6123 - perplexity: 301.7467 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2131 - val_perplexity: 1488169.6250\n", + "Epoch 34/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 569ms/step - categorical_accuracy: 0.2919 - loss: 4.4319 - perplexity: 154.5172 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2928 - val_perplexity: 1611684.3750\n", + "Epoch 35/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 585ms/step - categorical_accuracy: 0.1802 - loss: 5.1381 - perplexity: 190.7273 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.4868 - val_perplexity: 1956789.0000\n", + "Epoch 36/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 639ms/step - categorical_accuracy: 0.1719 - loss: 4.6314 - perplexity: 111.0518 - val_categorical_accuracy: 0.1667 - val_loss: 14.5656 - val_perplexity: 2117109.5000\n", + "Epoch 37/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 693ms/step - categorical_accuracy: 0.0839 - loss: 6.8925 - perplexity: 1205.9113 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6420 - val_perplexity: 2285232.2500\n", + "Epoch 38/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 594ms/step - categorical_accuracy: 0.2530 - loss: 5.8083 - perplexity: 927.8478 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6140 - val_perplexity: 2222210.0000\n", + "Epoch 39/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 557ms/step - categorical_accuracy: 0.1913 - loss: 4.0802 - perplexity: 62.5591 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5886 - val_perplexity: 2166540.2500\n", + "Epoch 40/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 757ms/step - categorical_accuracy: 0.2987 - loss: 4.2323 - perplexity: 90.9604 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6538 - val_perplexity: 2312421.2500\n", + "Epoch 41/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 529ms/step - categorical_accuracy: 0.2453 - loss: 3.7488 - perplexity: 50.1163 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6478 - val_perplexity: 2298534.5000\n", + "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000000_subtrial_0000000000000000.txt\n", + "returning trial 0 oracles\n", + " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", + "0 0.000000 11.769061 129192.796875 0.000000 \n", + "1 0.000000 11.635833 113077.960938 0.000000 \n", + "2 0.130435 11.652204 114944.367188 0.000000 \n", + "3 0.130435 11.464634 95285.593750 0.000000 \n", + "4 0.000000 11.768666 129141.796875 0.000000 \n", + "5 0.130435 10.994949 59572.500000 0.000000 \n", + "6 0.043478 11.276978 78982.257812 0.000000 \n", + "7 0.000000 11.120511 67542.414062 0.000000 \n", + "8 0.173913 10.726726 45557.273438 0.000000 \n", + "9 0.217391 10.059676 23380.933594 0.166667 \n", + "10 0.173913 10.355123 31417.570312 0.000000 \n", + "11 0.000000 10.472779 35340.300781 0.000000 \n", + "12 0.086957 10.171259 26140.964844 0.000000 \n", + "13 0.217391 9.254299 10449.392578 0.000000 \n", + "14 0.217391 8.896774 7308.360840 0.000000 \n", + "15 0.130435 9.018457 8254.035156 0.000000 \n", + "16 0.130435 8.039083 3099.770996 0.000000 \n", + "17 0.217391 7.848331 2561.456787 0.000000 \n", + "18 0.173913 7.948806 2832.192139 0.000000 \n", + "19 0.043478 7.698378 2204.769043 0.000000 \n", + "20 0.173913 7.669386 2141.766846 0.000000 \n", + "21 0.260870 6.773150 874.061218 0.166667 \n", + "22 0.217391 7.382279 1607.248413 0.166667 \n", + "23 0.130435 6.034015 417.387543 0.166667 \n", + "24 0.130435 6.000526 403.641022 0.166667 \n", + "25 0.043478 6.586512 725.246826 0.166667 \n", + "26 0.260870 5.741646 311.576935 0.166667 \n", + "27 0.130435 5.138083 170.388733 0.000000 \n", + "28 0.086957 5.670679 290.231415 0.000000 \n", + "29 0.217391 5.602477 271.096985 0.000000 \n", + "30 0.173913 6.986033 1081.422852 0.000000 \n", + "31 0.304348 4.127844 62.044033 0.000000 \n", + "32 0.217391 5.934126 377.709869 0.000000 \n", + "33 0.217391 5.564253 260.930054 0.000000 \n", + "34 0.173913 5.642823 282.258331 0.000000 \n", + "35 0.173913 4.475579 87.845474 0.166667 \n", + "36 0.043478 6.194771 490.179321 0.000000 \n", + "37 0.217391 5.472395 238.029572 0.000000 \n", + "38 0.173913 4.001881 54.700928 0.000000 \n", + "39 0.304348 3.707729 40.761116 0.000000 \n", + "40 0.260870 4.130568 62.213223 0.000000 \n", + "\n", + " val_loss val_perplexity trial_number subtrial_number \\\n", + "0 11.755738 1.274830e+05 0 0 \n", + "1 11.755464 1.274480e+05 0 0 \n", + "2 11.755542 1.274580e+05 0 0 \n", + "3 11.739563 1.254375e+05 0 0 \n", + "4 11.729648 1.241999e+05 0 0 \n", + "5 11.724008 1.235014e+05 0 0 \n", + "6 11.714873 1.223784e+05 0 0 \n", + "7 11.718637 1.228398e+05 0 0 \n", + "8 11.724952 1.236180e+05 0 0 \n", + "9 11.730713 1.243322e+05 0 0 \n", + "10 11.739503 1.254300e+05 0 0 \n", + "11 11.750234 1.267832e+05 0 0 \n", + "12 11.752646 1.270895e+05 0 0 \n", + "13 11.755273 1.274237e+05 0 0 \n", + "14 11.762258 1.283168e+05 0 0 \n", + "15 11.835355 1.380477e+05 0 0 \n", + "16 11.884326 1.449764e+05 0 0 \n", + "17 11.947881 1.544894e+05 0 0 \n", + "18 12.035375 1.686152e+05 0 0 \n", + "19 12.174404 1.937655e+05 0 0 \n", + "20 12.258733 2.108143e+05 0 0 \n", + "21 12.340481 2.287719e+05 0 0 \n", + "22 12.413980 2.462197e+05 0 0 \n", + "23 12.586273 2.925156e+05 0 0 \n", + "24 12.676117 3.200130e+05 0 0 \n", + "25 12.751137 3.449438e+05 0 0 \n", + "26 12.875606 3.906650e+05 0 0 \n", + "27 13.195358 5.378625e+05 0 0 \n", + "28 13.359014 6.334989e+05 0 0 \n", + "29 13.525296 7.481032e+05 0 0 \n", + "30 13.622841 8.247544e+05 0 0 \n", + "31 14.037686 1.248791e+06 0 0 \n", + "32 14.213058 1.488170e+06 0 0 \n", + "33 14.292789 1.611684e+06 0 0 \n", + "34 14.486815 1.956789e+06 0 0 \n", + "35 14.565562 2.117110e+06 0 0 \n", + "36 14.641978 2.285232e+06 0 0 \n", + "37 14.614013 2.222210e+06 0 0 \n", + "38 14.588642 2.166540e+06 0 0 \n", + "39 14.653806 2.312421e+06 0 0 \n", + "40 14.647781 2.298534e+06 0 0 \n", + "\n", + " model_name \n", + "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "Global task progress: 33%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–Ž \u001b[0m| 1/3 [03:54<07:49, 234.85s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SimpleCerebrosRandomSearch.input_shapes: [(40,)]\n", + "nan\n", + ">nnf>ceil\n", + "k is: 0 value is: [{'1': }]\n", + "0\n", + "k is: 1 value is: [{'2': }, {'2': }]\n", + "1\n", + "Trying to create level 1\n", + "We think level 1's predecessors are: [0]\n", + "k is: 2 value is: [{'128260': }]\n", + "2\n", + "Trying to create Final level 2\n", + "Trying to create level 2\n", + "We think level final level 2's predecessors are: [0, 1]\n", + "levels:\n", + "[0, 1, 2]\n", + "{'0': 'InputUnitModule'}\n", + "InputLevel.input_shapes [(40,)]\n", + "{'2': }\n", + "{'2': }\n", + "Debug: I am 2 selecting 1\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", + "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", + "Debug: successor_connectivity_errors_2d []\n", + "$$$$$$>>>>> Base model: \n", + "InputUnit.input_shape: (40,)\n", + "{'2': }\n", + "{'2': }\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_DenseLevel_0000000000000001_tr_1_DenseUnit_0000000000000001_tr_1_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_DenseLevel_0000000000000001_tr_1_DenseUnit_0000000000000001_tr_1_1 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "{'128260': }\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_FinalDenseLevel_0000000000000002_tr_1_FinalDenseUnit_0000000000000002_tr_1_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "inputs\n", + "\n", + "\n", + "outputs\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_1_nn_materialized\"\u001b[0m\n" + ], + "text/html": [ + "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_1_nn_materialized\"\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", + "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", + "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", + "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" + ], + "text/html": [ + "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
+              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
+              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
+              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" + ], + "text/html": [ + "
 Total params: 52,476,644 (200.18 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" + ], + "text/html": [ + "
 Trainable params: 52,474,124 (200.17 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" + ], + "text/html": [ + "
 Non-trainable params: 2,520 (9.84 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n", + "Epoch 1/41\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", + "Expected: NeuralNetworkFuture_0000000000000nan_tr_1_InputLevel_0000000000000000_tr_1_InputUnit_0000000000000000_tr_1_0_inp\n", + "Received: inputs=('Tensor(shape=(None, 40))',)\n", + " warnings.warn(msg)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 1s/step - categorical_accuracy: 0.0000e+00 - loss: 11.7384 - perplexity: 329529.3125 - val_categorical_accuracy: 0.1667 - val_loss: 11.7688 - val_perplexity: 129164.8281\n", + "Epoch 2/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 595ms/step - categorical_accuracy: 0.1913 - loss: 11.2528 - perplexity: 77375.8594 - val_categorical_accuracy: 0.1667 - val_loss: 11.7502 - val_perplexity: 126778.7031\n", + "Epoch 3/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 518ms/step - categorical_accuracy: 0.2191 - loss: 10.8135 - perplexity: 50491.5156 - val_categorical_accuracy: 0.1667 - val_loss: 11.7425 - val_perplexity: 125805.1797\n", + "Epoch 4/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 522ms/step - categorical_accuracy: 0.1864 - loss: 10.2940 - perplexity: 30868.9629 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7451 - val_perplexity: 126128.5781\n", + "Epoch 5/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 820ms/step - categorical_accuracy: 0.1913 - loss: 9.7216 - perplexity: 16997.6719 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7362 - val_perplexity: 125020.5859\n", + "Epoch 6/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 645ms/step - categorical_accuracy: 0.1407 - loss: 8.9741 - perplexity: 8181.5312 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7171 - val_perplexity: 122652.5234\n", + "Epoch 7/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 593ms/step - categorical_accuracy: 0.1830 - loss: 8.4567 - perplexity: 4759.8066 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6908 - val_perplexity: 119465.5703\n", + "Epoch 8/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 685ms/step - categorical_accuracy: 0.0506 - loss: 8.2385 - perplexity: 4355.3149 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6441 - val_perplexity: 114018.0000\n", + "Epoch 9/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 651ms/step - categorical_accuracy: 0.2141 - loss: 7.1757 - perplexity: 1335.3220 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6267 - val_perplexity: 112051.2734\n", + "Epoch 10/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 545ms/step - categorical_accuracy: 0.1830 - loss: 7.3339 - perplexity: 1963.8916 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6375 - val_perplexity: 113263.3828\n", + "Epoch 11/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 565ms/step - categorical_accuracy: 0.2669 - loss: 6.6371 - perplexity: 870.7467 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6597 - val_perplexity: 115809.4375\n", + "Epoch 12/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 653ms/step - categorical_accuracy: 0.1719 - loss: 5.9232 - perplexity: 380.9991 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7503 - val_perplexity: 126796.4766\n", + "Epoch 13/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 626ms/step - categorical_accuracy: 0.0839 - loss: 7.4954 - perplexity: 2688.5974 - val_categorical_accuracy: 0.1667 - val_loss: 11.8025 - val_perplexity: 133587.0156\n", + "Epoch 14/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 555ms/step - categorical_accuracy: 0.0963 - loss: 6.5658 - perplexity: 758.4783 - val_categorical_accuracy: 0.1667 - val_loss: 11.8975 - val_perplexity: 146902.5625\n", + "Epoch 15/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 599ms/step - categorical_accuracy: 0.2419 - loss: 4.4233 - perplexity: 101.5967 - val_categorical_accuracy: 0.1667 - val_loss: 11.9977 - val_perplexity: 162383.4688\n", + "Epoch 16/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 597ms/step - categorical_accuracy: 0.2018 - loss: 4.8811 - perplexity: 147.2505 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.2702 - val_perplexity: 213244.7812\n", + "Epoch 17/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 514ms/step - categorical_accuracy: 0.2419 - loss: 4.8847 - perplexity: 212.5692 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.4334 - val_perplexity: 251053.4531\n", + "Epoch 18/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 516ms/step - categorical_accuracy: 0.1725 - loss: 5.0510 - perplexity: 216.2864 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.5886 - val_perplexity: 293192.5625\n", + "Epoch 19/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 1s/step - categorical_accuracy: 0.3348 - loss: 4.1482 - perplexity: 66.5400 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.7669 - val_perplexity: 350434.6875\n", + "Epoch 20/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 518ms/step - categorical_accuracy: 0.2364 - loss: 6.1440 - perplexity: 556.4460 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1211 - val_perplexity: 499357.5000\n", + "Epoch 21/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 566ms/step - categorical_accuracy: 0.2752 - loss: 4.0937 - perplexity: 103.8000 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.2722 - val_perplexity: 580840.3125\n", + "Epoch 22/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 584ms/step - categorical_accuracy: 0.3582 - loss: 3.5086 - perplexity: 42.0227 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3929 - val_perplexity: 655350.3750\n", + "Epoch 23/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 574ms/step - categorical_accuracy: 0.2357 - loss: 3.6651 - perplexity: 42.0124 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5131 - val_perplexity: 739037.1875\n", + "Epoch 24/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 555ms/step - categorical_accuracy: 0.3743 - loss: 4.2759 - perplexity: 78.8337 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6073 - val_perplexity: 812073.0625\n", + "Epoch 25/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 619ms/step - categorical_accuracy: 0.2814 - loss: 6.1106 - perplexity: 702.3881 - val_categorical_accuracy: 0.1667 - val_loss: 13.6209 - val_perplexity: 823172.5625\n", + "Epoch 26/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 689ms/step - categorical_accuracy: 0.2647 - loss: 6.2123 - perplexity: 835.9423 - val_categorical_accuracy: 0.1667 - val_loss: 13.5922 - val_perplexity: 799900.9375\n", + "Epoch 27/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 824ms/step - categorical_accuracy: 0.3014 - loss: 3.9091 - perplexity: 57.1766 - val_categorical_accuracy: 0.1667 - val_loss: 13.5968 - val_perplexity: 803528.0625\n", + "Epoch 28/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 594ms/step - categorical_accuracy: 0.2864 - loss: 5.1544 - perplexity: 186.2288 - val_categorical_accuracy: 0.1667 - val_loss: 13.5879 - val_perplexity: 796426.8750\n", + "Epoch 29/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 973ms/step - categorical_accuracy: 0.2314 - loss: 5.0346 - perplexity: 261.9535 - val_categorical_accuracy: 0.1667 - val_loss: 13.5785 - val_perplexity: 788957.9375\n", + "Epoch 30/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 1s/step - categorical_accuracy: 0.3508 - loss: 3.9460 - perplexity: 55.9352 - val_categorical_accuracy: 0.1667 - val_loss: 13.5878 - val_perplexity: 796315.2500\n", + "Epoch 31/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 684ms/step - categorical_accuracy: 0.2141 - loss: 3.3061 - perplexity: 29.5618 - val_categorical_accuracy: 0.1667 - val_loss: 13.5959 - val_perplexity: 802850.9375\n", + "Epoch 32/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 659ms/step - categorical_accuracy: 0.1719 - loss: 4.1759 - perplexity: 72.8835 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.7057 - val_perplexity: 896031.1250\n", + "Epoch 33/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 725ms/step - categorical_accuracy: 0.1302 - loss: 5.0193 - perplexity: 177.1105 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.7885 - val_perplexity: 973393.5000\n", + "Epoch 34/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 632ms/step - categorical_accuracy: 0.2919 - loss: 2.9201 - perplexity: 24.4465 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.9237 - val_perplexity: 1114295.3750\n", + "Epoch 35/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 639ms/step - categorical_accuracy: 0.3197 - loss: 3.6359 - perplexity: 60.7448 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0890 - val_perplexity: 1314598.2500\n", + "Epoch 36/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 527ms/step - categorical_accuracy: 0.2364 - loss: 3.6853 - perplexity: 93.4177 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.1742 - val_perplexity: 1431418.1250\n", + "Epoch 37/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 605ms/step - categorical_accuracy: 0.2731 - loss: 3.3295 - perplexity: 31.0892 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2398 - val_perplexity: 1528469.6250\n", + "Epoch 38/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 755ms/step - categorical_accuracy: 0.5054 - loss: 4.2462 - perplexity: 96.5757 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3218 - val_perplexity: 1659098.5000\n", + "Epoch 39/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 606ms/step - categorical_accuracy: 0.3638 - loss: 3.2328 - perplexity: 26.5526 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3728 - val_perplexity: 1745870.1250\n", + "Epoch 40/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 745ms/step - categorical_accuracy: 0.5727 - loss: 1.9158 - perplexity: 9.5471 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5209 - val_perplexity: 2024707.8750\n", + "Epoch 41/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 671ms/step - categorical_accuracy: 0.1068 - loss: 5.0107 - perplexity: 172.7614 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5204 - val_perplexity: 2023570.8750\n", + "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000001_subtrial_0000000000000000.txt\n", + "returning trial 1 oracles\n", + " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", + "0 0.000000 11.719700 225372.531250 0.166667 \n", + "1 0.173913 11.155995 69982.140625 0.166667 \n", + "2 0.173913 10.995764 59621.039062 0.166667 \n", + "3 0.217391 10.042144 22974.583984 0.000000 \n", + "4 0.173913 9.805058 18125.181641 0.000000 \n", + "5 0.130435 9.198784 9885.100586 0.000000 \n", + "6 0.173913 8.641828 5663.671387 0.000000 \n", + "7 0.043478 8.808529 6691.075195 0.000000 \n", + "8 0.217391 7.256882 1417.828491 0.000000 \n", + "9 0.173913 6.904544 996.794250 0.000000 \n", + "10 0.217391 6.873430 966.256958 0.000000 \n", + "11 0.173913 5.982946 396.607025 0.000000 \n", + "12 0.043478 6.824471 920.089539 0.166667 \n", + "13 0.130435 6.259269 522.836731 0.166667 \n", + "14 0.217391 5.205779 182.322769 0.166667 \n", + "15 0.130435 5.462027 235.574463 0.000000 \n", + "16 0.217391 6.074162 434.485474 0.000000 \n", + "17 0.217391 5.354462 211.550262 0.000000 \n", + "18 0.304348 4.318021 75.040001 0.000000 \n", + "19 0.217391 5.875260 356.117035 0.000000 \n", + "20 0.217391 5.246053 189.815536 0.000000 \n", + "21 0.391304 4.035575 56.575462 0.000000 \n", + "22 0.173913 3.672752 39.360092 0.000000 \n", + "23 0.347826 4.800797 121.607239 0.000000 \n", + "24 0.260870 6.058529 427.745911 0.166667 \n", + "25 0.260870 6.874752 967.535400 0.166667 \n", + "26 0.304348 3.871903 48.033691 0.166667 \n", + "27 0.217391 5.597022 269.622284 0.166667 \n", + "28 0.260870 4.006342 54.945507 0.166667 \n", + "29 0.260870 4.286894 72.740173 0.166667 \n", + "30 0.217391 3.180355 24.055300 0.166667 \n", + "31 0.173913 4.073040 58.735218 0.000000 \n", + "32 0.173913 5.302594 200.857193 0.000000 \n", + "33 0.217391 3.763384 43.094006 0.000000 \n", + "34 0.217391 4.363249 78.511826 0.000000 \n", + "35 0.217391 5.450110 232.783875 0.000000 \n", + "36 0.260870 3.634080 37.866989 0.000000 \n", + "37 0.391304 5.082735 161.214310 0.000000 \n", + "38 0.391304 3.312840 27.463017 0.000000 \n", + "39 0.434783 2.846823 17.232950 0.000000 \n", + "40 0.086957 5.169964 175.908478 0.000000 \n", + "\n", + " val_loss val_perplexity trial_number subtrial_number \\\n", + "0 11.768844 1.291648e+05 1 0 \n", + "1 11.750198 1.267787e+05 1 0 \n", + "2 11.742490 1.258052e+05 1 0 \n", + "3 11.745057 1.261286e+05 1 0 \n", + "4 11.736234 1.250206e+05 1 0 \n", + "5 11.717111 1.226525e+05 1 0 \n", + "6 11.690784 1.194656e+05 1 0 \n", + "7 11.644112 1.140180e+05 1 0 \n", + "8 11.626712 1.120513e+05 1 0 \n", + "9 11.637473 1.132634e+05 1 0 \n", + "10 11.659701 1.158094e+05 1 0 \n", + "11 11.750339 1.267965e+05 1 0 \n", + "12 11.802508 1.335870e+05 1 0 \n", + "13 11.897525 1.469026e+05 1 0 \n", + "14 11.997716 1.623835e+05 1 0 \n", + "15 12.270196 2.132448e+05 1 0 \n", + "16 12.433421 2.510535e+05 1 0 \n", + "17 12.588585 2.931926e+05 1 0 \n", + "18 12.766930 3.504347e+05 1 0 \n", + "19 13.121078 4.993575e+05 1 0 \n", + "20 13.272231 5.808403e+05 1 0 \n", + "21 13.392925 6.553504e+05 1 0 \n", + "22 13.513103 7.390372e+05 1 0 \n", + "23 13.607346 8.120731e+05 1 0 \n", + "24 13.620921 8.231726e+05 1 0 \n", + "25 13.592243 7.999009e+05 1 0 \n", + "26 13.596767 8.035281e+05 1 0 \n", + "27 13.587891 7.964269e+05 1 0 \n", + "28 13.578468 7.889579e+05 1 0 \n", + "29 13.587750 7.963152e+05 1 0 \n", + "30 13.595924 8.028509e+05 1 0 \n", + "31 13.705730 8.960311e+05 1 0 \n", + "32 13.788544 9.733935e+05 1 0 \n", + "33 13.923733 1.114295e+06 1 0 \n", + "34 14.089040 1.314598e+06 1 0 \n", + "35 14.174176 1.431418e+06 1 0 \n", + "36 14.239779 1.528470e+06 1 0 \n", + "37 14.321785 1.659098e+06 1 0 \n", + "38 14.372764 1.745870e+06 1 0 \n", + "39 14.520935 2.024708e+06 1 0 \n", + "40 14.520374 2.023571e+06 1 0 \n", + "\n", + " model_name \n", + "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "Global task progress: 67%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ \u001b[0m| 2/3 [07:42<03:50, 230.58s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SimpleCerebrosRandomSearch.input_shapes: [(40,)]\n", + "nan\n", + ">nnf>ceil\n", + "k is: 0 value is: [{'1': }]\n", + "0\n", + "k is: 1 value is: [{'2': }, {'2': }]\n", + "1\n", + "Trying to create level 1\n", + "We think level 1's predecessors are: [0]\n", + "k is: 2 value is: [{'128260': }]\n", + "2\n", + "Trying to create Final level 2\n", + "Trying to create level 2\n", + "We think level final level 2's predecessors are: [0, 1]\n", + "levels:\n", + "[0, 1, 2]\n", + "{'0': 'InputUnitModule'}\n", + "InputLevel.input_shapes [(40,)]\n", + "{'2': }\n", + "{'2': }\n", + "Debug: I am 2 selecting 1\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", + "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", + "Debug: successor_connectivity_errors_2d []\n", + "$$$$$$>>>>> Base model: \n", + "InputUnit.input_shape: (40,)\n", + "{'2': }\n", + "{'2': }\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_DenseLevel_0000000000000001_tr_2_DenseUnit_0000000000000001_tr_2_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_DenseLevel_0000000000000001_tr_2_DenseUnit_0000000000000001_tr_2_1 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "{'128260': }\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_FinalDenseLevel_0000000000000002_tr_2_FinalDenseUnit_0000000000000002_tr_2_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "inputs\n", + "\n", + "\n", + "outputs\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_2_nn_materialized\"\u001b[0m\n" + ], + "text/html": [ + "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_2_nn_materialized\"\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", + "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", + "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", + "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" + ], + "text/html": [ + "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
+              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
+              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
+              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" + ], + "text/html": [ + "
 Total params: 52,476,644 (200.18 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" + ], + "text/html": [ + "
 Trainable params: 52,474,124 (200.17 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" + ], + "text/html": [ + "
 Non-trainable params: 2,520 (9.84 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n", + "Epoch 1/41\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", + "Expected: NeuralNetworkFuture_0000000000000nan_tr_2_InputLevel_0000000000000000_tr_2_InputUnit_0000000000000000_tr_2_0_inp\n", + "Received: inputs=('Tensor(shape=(None, 40))',)\n", + " warnings.warn(msg)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 1s/step - categorical_accuracy: 0.0000e+00 - loss: 11.7705 - perplexity: 321629.5625 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7229 - val_perplexity: 123359.4219\n", + "Epoch 2/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 553ms/step - categorical_accuracy: 0.2203 - loss: 11.1997 - perplexity: 73499.6797 - val_categorical_accuracy: 0.1667 - val_loss: 11.6443 - val_perplexity: 114043.3438\n", + "Epoch 3/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 810ms/step - categorical_accuracy: 0.0568 - loss: 10.8859 - perplexity: 55947.3047 - val_categorical_accuracy: 0.1667 - val_loss: 11.6099 - val_perplexity: 110179.2891\n", + "Epoch 4/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 764ms/step - categorical_accuracy: 0.0857 - loss: 10.3024 - perplexity: 32385.9180 - val_categorical_accuracy: 0.1667 - val_loss: 11.5822 - val_perplexity: 107167.9375\n", + "Epoch 5/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 804ms/step - categorical_accuracy: 0.4326 - loss: 9.0836 - perplexity: 8933.4072 - val_categorical_accuracy: 0.1667 - val_loss: 11.5889 - val_perplexity: 107891.9219\n", + "Epoch 6/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 748ms/step - categorical_accuracy: 0.2370 - loss: 8.2757 - perplexity: 3973.7712 - val_categorical_accuracy: 0.1667 - val_loss: 11.6114 - val_perplexity: 110344.8047\n", + "Epoch 7/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 697ms/step - categorical_accuracy: 0.3215 - loss: 7.9711 - perplexity: 3110.2710 - val_categorical_accuracy: 0.1667 - val_loss: 11.6354 - val_perplexity: 113026.3203\n", + "Epoch 8/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 847ms/step - categorical_accuracy: 0.3126 - loss: 7.4265 - perplexity: 1791.7644 - val_categorical_accuracy: 0.1667 - val_loss: 11.7362 - val_perplexity: 125022.5000\n", + "Epoch 9/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 671ms/step - categorical_accuracy: 0.3715 - loss: 6.4991 - perplexity: 682.2664 - val_categorical_accuracy: 0.1667 - val_loss: 11.8366 - val_perplexity: 138215.8281\n", + "Epoch 10/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 661ms/step - categorical_accuracy: 0.0734 - loss: 6.6783 - perplexity: 959.8943 - val_categorical_accuracy: 0.1667 - val_loss: 11.9409 - val_perplexity: 153410.4375\n", + "Epoch 11/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 672ms/step - categorical_accuracy: 0.2731 - loss: 5.4216 - perplexity: 253.3555 - val_categorical_accuracy: 0.1667 - val_loss: 12.0696 - val_perplexity: 174486.5469\n", + "Epoch 12/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 771ms/step - categorical_accuracy: 0.2197 - loss: 5.1631 - perplexity: 218.4438 - val_categorical_accuracy: 0.1667 - val_loss: 12.3783 - val_perplexity: 237591.3281\n", + "Epoch 13/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 590ms/step - categorical_accuracy: 0.0990 - loss: 6.2295 - perplexity: 540.8942 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.5687 - val_perplexity: 287426.0312\n", + "Epoch 14/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 779ms/step - categorical_accuracy: 0.1786 - loss: 6.4879 - perplexity: 5995.6064 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.7386 - val_perplexity: 340639.4062\n", + "Epoch 15/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 748ms/step - categorical_accuracy: 0.1752 - loss: 5.1388 - perplexity: 226.8788 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.8619 - val_perplexity: 385366.4375\n", + "Epoch 16/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 621ms/step - categorical_accuracy: 0.1641 - loss: 5.7236 - perplexity: 863.9962 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1398 - val_perplexity: 508785.5938\n", + "Epoch 17/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 689ms/step - categorical_accuracy: 0.4921 - loss: 2.9571 - perplexity: 20.0698 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3148 - val_perplexity: 606077.4375\n", + "Epoch 18/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 844ms/step - categorical_accuracy: 0.3659 - loss: 4.9595 - perplexity: 819.0781 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5068 - val_perplexity: 734419.0000\n", + "Epoch 19/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 593ms/step - categorical_accuracy: 0.3014 - loss: 4.9173 - perplexity: 152.2117 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6640 - val_perplexity: 859409.2500\n", + "Epoch 20/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 847ms/step - categorical_accuracy: 0.2308 - loss: 3.7793 - perplexity: 60.0206 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.9386 - val_perplexity: 1131028.2500\n", + "Epoch 21/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 918ms/step - categorical_accuracy: 0.3832 - loss: 3.4479 - perplexity: 80.4731 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0840 - val_perplexity: 1307933.0000\n", + "Epoch 22/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 560ms/step - categorical_accuracy: 0.3860 - loss: 4.3510 - perplexity: 90.9878 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.1620 - val_perplexity: 1414104.8750\n", + "Epoch 23/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 635ms/step - categorical_accuracy: 0.4443 - loss: 2.9553 - perplexity: 23.0736 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2588 - val_perplexity: 1557883.3750\n", + "Epoch 24/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 636ms/step - categorical_accuracy: 0.4983 - loss: 2.2404 - perplexity: 9.9262 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3779 - val_perplexity: 1754904.3750\n", + "Epoch 25/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 623ms/step - categorical_accuracy: 0.2909 - loss: 5.2172 - perplexity: 247.5778 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.4472 - val_perplexity: 1880756.3750\n", + "Epoch 26/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 806ms/step - categorical_accuracy: 0.3048 - loss: 3.1858 - perplexity: 25.7062 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5364 - val_perplexity: 2056196.2500\n", + "Epoch 27/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 843ms/step - categorical_accuracy: 0.1185 - loss: 3.0064 - perplexity: 25.4380 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5902 - val_perplexity: 2169912.7500\n", + "Epoch 28/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 866ms/step - categorical_accuracy: 0.5033 - loss: 2.9283 - perplexity: 35.1612 - val_categorical_accuracy: 0.1667 - val_loss: 14.6578 - val_perplexity: 2321627.0000\n", + "Epoch 29/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 794ms/step - categorical_accuracy: 0.3320 - loss: 3.2803 - perplexity: 27.8907 - val_categorical_accuracy: 0.1667 - val_loss: 14.7196 - val_perplexity: 2469625.0000\n", + "Epoch 30/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 660ms/step - categorical_accuracy: 0.2752 - loss: 5.4753 - perplexity: 249.7908 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.8572 - val_perplexity: 2834024.2500\n", + "Epoch 31/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 748ms/step - categorical_accuracy: 0.2925 - loss: 5.2035 - perplexity: 302.8727 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.9761 - val_perplexity: 3191841.5000\n", + "Epoch 32/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 585ms/step - categorical_accuracy: 0.2715 - loss: 3.0830 - perplexity: 22.1130 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.0934 - val_perplexity: 3589043.2500\n", + "Epoch 33/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 837ms/step - categorical_accuracy: 0.3638 - loss: 2.0138 - perplexity: 7.6831 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.1927 - val_perplexity: 3963894.5000\n", + "Epoch 34/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 622ms/step - categorical_accuracy: 0.4165 - loss: 2.3430 - perplexity: 12.4422 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.2933 - val_perplexity: 4383348.5000\n", + "Epoch 35/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 730ms/step - categorical_accuracy: 0.4832 - loss: 3.8156 - perplexity: 57.3130 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4055 - val_perplexity: 4903895.5000\n", + "Epoch 36/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 831ms/step - categorical_accuracy: 0.1641 - loss: 4.5182 - perplexity: 317.0210 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4245 - val_perplexity: 4998003.5000\n", + "Epoch 37/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 621ms/step - categorical_accuracy: 0.2752 - loss: 2.9753 - perplexity: 25.5228 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4590 - val_perplexity: 5173094.0000\n", + "Epoch 38/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 666ms/step - categorical_accuracy: 0.2280 - loss: 2.4058 - perplexity: 11.6680 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4232 - val_perplexity: 4991435.0000\n", + "Epoch 39/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 565ms/step - categorical_accuracy: 0.3592 - loss: 3.6356 - perplexity: 40.6227 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4089 - val_perplexity: 4920268.0000\n", + "Epoch 40/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 665ms/step - categorical_accuracy: 0.2691 - loss: 3.4659 - perplexity: 47.4784 - val_categorical_accuracy: 0.1667 - val_loss: 15.3797 - val_perplexity: 4778703.0000\n", + "Epoch 41/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 693ms/step - categorical_accuracy: 0.4310 - loss: 2.7924 - perplexity: 20.8595 - val_categorical_accuracy: 0.1667 - val_loss: 15.4324 - val_perplexity: 5037467.5000\n", + "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000002_subtrial_0000000000000000.txt\n", + "returning trial 2 oracles\n", + " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", + "0 0.000000 11.761698 226943.984375 0.000000 \n", + "1 0.260870 11.090140 65521.941406 0.166667 \n", + "2 0.086957 10.702163 44451.890625 0.166667 \n", + "3 0.173913 9.990288 21813.576172 0.166667 \n", + "4 0.347826 9.246581 10369.053711 0.166667 \n", + "5 0.260870 8.266317 3890.594971 0.166667 \n", + "6 0.347826 7.704062 2217.337646 0.166667 \n", + "7 0.304348 7.122604 1239.675415 0.166667 \n", + "8 0.347826 6.225540 505.495789 0.166667 \n", + "9 0.086957 6.562615 708.120972 0.166667 \n", + "10 0.260870 5.511858 247.610764 0.166667 \n", + "11 0.217391 5.295715 199.480270 0.166667 \n", + "12 0.130435 6.463620 641.378784 0.000000 \n", + "13 0.260870 5.026217 152.355484 0.000000 \n", + "14 0.217391 5.910099 368.742676 0.000000 \n", + "15 0.217391 4.887461 132.616455 0.000000 \n", + "16 0.434783 3.347833 28.441027 0.000000 \n", + "17 0.347826 4.313054 74.668182 0.000000 \n", + "18 0.304348 4.665129 106.179253 0.000000 \n", + "19 0.217391 4.334057 76.253006 0.000000 \n", + "20 0.391304 2.807739 16.572411 0.000000 \n", + "21 0.391304 4.215992 67.761353 0.000000 \n", + "22 0.391304 3.522572 33.871445 0.000000 \n", + "23 0.478261 2.265072 9.631822 0.000000 \n", + "24 0.347826 5.091538 162.639801 0.000000 \n", + "25 0.347826 2.982907 19.745134 0.000000 \n", + "26 0.130435 3.861120 47.518566 0.000000 \n", + "27 0.434783 4.315707 74.866554 0.166667 \n", + "28 0.304348 3.004366 20.173416 0.166667 \n", + "29 0.217391 5.262289 192.922501 0.000000 \n", + "30 0.260870 5.697386 298.087250 0.000000 \n", + "31 0.347826 3.149921 23.334219 0.000000 \n", + "32 0.391304 2.063896 7.876601 0.000000 \n", + "33 0.391304 3.141111 23.129541 0.000000 \n", + "34 0.391304 3.663168 38.984657 0.000000 \n", + "35 0.217391 3.455597 31.677193 0.000000 \n", + "36 0.217391 3.796592 44.549091 0.000000 \n", + "37 0.217391 2.545129 12.744876 0.000000 \n", + "38 0.260870 4.018140 55.597588 0.000000 \n", + "39 0.173913 3.072442 21.594568 0.166667 \n", + "40 0.434783 2.709372 15.019834 0.166667 \n", + "\n", + " val_loss val_perplexity trial_number subtrial_number \\\n", + "0 11.722857 1.233594e+05 2 0 \n", + "1 11.644334 1.140433e+05 2 0 \n", + "2 11.609864 1.101793e+05 2 0 \n", + "3 11.582150 1.071679e+05 2 0 \n", + "4 11.588885 1.078919e+05 2 0 \n", + "5 11.611365 1.103448e+05 2 0 \n", + "6 11.635376 1.130263e+05 2 0 \n", + "7 11.736249 1.250225e+05 2 0 \n", + "8 11.836572 1.382158e+05 2 0 \n", + "9 11.940872 1.534104e+05 2 0 \n", + "10 12.069603 1.744865e+05 2 0 \n", + "11 12.378307 2.375913e+05 2 0 \n", + "12 12.568721 2.874260e+05 2 0 \n", + "13 12.738580 3.406394e+05 2 0 \n", + "14 12.861950 3.853664e+05 2 0 \n", + "15 13.139782 5.087856e+05 2 0 \n", + "16 13.314763 6.060774e+05 2 0 \n", + "17 13.506835 7.344190e+05 2 0 \n", + "18 13.664001 8.594092e+05 2 0 \n", + "19 13.938638 1.131028e+06 2 0 \n", + "20 14.083958 1.307933e+06 2 0 \n", + "21 14.162007 1.414105e+06 2 0 \n", + "22 14.258838 1.557883e+06 2 0 \n", + "23 14.377925 1.754904e+06 2 0 \n", + "24 14.447185 1.880756e+06 2 0 \n", + "25 14.536368 2.056196e+06 2 0 \n", + "26 14.590198 2.169913e+06 2 0 \n", + "27 14.657779 2.321627e+06 2 0 \n", + "28 14.719577 2.469625e+06 2 0 \n", + "29 14.857208 2.834024e+06 2 0 \n", + "30 14.976109 3.191842e+06 2 0 \n", + "31 15.093396 3.589043e+06 2 0 \n", + "32 15.192738 3.963894e+06 2 0 \n", + "33 15.293323 4.383348e+06 2 0 \n", + "34 15.405540 4.903896e+06 2 0 \n", + "35 15.424549 4.998004e+06 2 0 \n", + "36 15.458982 5.173094e+06 2 0 \n", + "37 15.423234 4.991435e+06 2 0 \n", + "38 15.408874 4.920268e+06 2 0 \n", + "39 15.379680 4.778703e+06 2 0 \n", + "40 15.432412 5.037468e+06 2 0 \n", + "\n", + " model_name \n", + "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "Global task progress: 100%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ\u001b[0m| 3/3 [12:11<00:00, 243.86s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Index(['categorical_accuracy', 'loss', 'perplexity',\n", + " 'val_categorical_accuracy', 'val_loss', 'val_perplexity',\n", + " 'trial_number', 'subtrial_number', 'model_name'],\n", + " dtype='object')\n", + "metric_to_rank_by is: 'perplexity'\n", + "Type of metric_to_rank_by is: \n", + "metric_to_rank_by is: 'perplexity'\n", + "Type of metric_to_rank_by is: \n", + "Best result this trial was: 7.876600742340088\n", + "Type of best result: \n", + "Best model name: 2025_11_23 16_55_cerebros_not-gpt_meta_42/models/tr_0000000000000002_subtrial_0000000000000000.keras\n", + "Cerebros trained 3 models in 12.19 min. Average time per model: 4.06 min.\n", + "Cerebros best perplexity achieved in Phase I-a is 7.876600742340088\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Training Stage I-a - Model Evaluation (Subjective):\n", + "\n", + "- We retrieve the best model found during the NAS phase and test its text generation capabilities from a subjective standpoint.\n", + "- Keep in mind, this is trained on 10 text samples. It is impressive that it can generate anything, especially subjects and verbs that are on-topic and agree, and is otherwise sensible, despite being grammatically gibberish.\n", + "\n", + "FYI: The generative components we imported from cerebrosllmutils:\n", + "\n", + "## Model config\n", + "```python\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='CerebrosNotGPTConfig')\n", + "class CerebrosNotGPTConfig:\n", + " def __init__(self, max_sequence_length=1536, padding_token=None):\n", + " self.max_sequence_length = max_sequence_length\n", + " self.padding_token = padding_token\n", + "\n", + " def get_config(self):\n", + " return {\n", + " 'max_sequence_length': self.max_sequence_length,\n", + " 'padding_token': self.padding_token\n", + " }\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " return cls(**config)\n", + "```\n", + "\n", + "## Model class we imported from cerebrosllmutil, having:\n", + "\n", + "- Greedy sampling\n", + "- Temperature scaling\n", + "- Top p sampling\n", + "- Top k sampling\n", + "- Presence penlaty\n", + "- Frequency penalty\n", + "- Repetition penalty\n", + "\n", + "```python\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='CerebrosNotGPT')\n", + "class CerebrosNotGPT(tf.keras.Model):\n", + " def __init__(self, config: Any, model: Any = None, **kwargs):\n", + " # 1. Store the nested model argument.\n", + " self.config = config\n", + " self.model = model\n", + " \n", + " # 2. Extract and remove custom kwargs (like 'model') before calling super.\n", + " # This is important to prevent 'unrecognized keyword argument' errors.\n", + " # The nested model is already extracted and stored, so it can be safely removed.\n", + " kwargs.pop('model', None)\n", + " \n", + " # 3. Call the parent constructor with the cleaned kwargs.\n", + " super().__init__(**kwargs)\n", + "\n", + " self.max_sequence_length = config.max_sequence_length\n", + " self.padding_token = config.padding_token\n", + "\n", + " def get_config(self):\n", + " base_config = super().get_config()\n", + " config_dict = {\n", + " 'config': self.config.get_config(),\n", + " }\n", + " \n", + " # Explicitly handle nested model serialization.\n", + " # This is required if Keras's automatic tracking fails.\n", + " if self.model is not None:\n", + " # Note: This approach might still suffer from weight loss.\n", + " # The recommended way is to let Keras handle it automatically.\n", + " config_dict['model'] = tf.keras.utils.serialize_keras_object(self.model)\n", + "\n", + " base_config.update(config_dict)\n", + " return base_config\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " # Separate the custom config.\n", + " config_obj_dict = config.pop('config')\n", + " config_obj = CerebrosNotGPTConfig.from_config(config_obj_dict)\n", + " \n", + " # Manually extract and load the nested model.\n", + " nested_model_config = config.pop('model', None)\n", + " if nested_model_config:\n", + " nested_model = tf.keras.utils.deserialize_keras_object(nested_model_config)\n", + " else:\n", + " nested_model = None\n", + " \n", + " # Reconstruct the outer model by passing the restored parts.\n", + " return cls(config=config_obj, model=nested_model, **config)\n", + "\n", + " def call(self, inputs, training=False):\n", + " if self.model is None:\n", + " raise ValueError(\"Inner model not initialized properly\")\n", + " return self.model(inputs, training=training)\n", + "\n", + " @staticmethod\n", + " def apply_top_k_probs(probs, k):\n", + " if k is None or k <= 0:\n", + " return probs\n", + " # Flatten and argsort for indices\n", + " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", + " keep_indices = sorted_indices[:k]\n", + " mask = tf.zeros_like(probs, dtype=tf.bool)\n", + " mask = tf.tensor_scatter_nd_update(mask, tf.reshape(keep_indices, (-1, 1)),\n", + " tf.ones((k,), dtype=tf.bool))\n", + " filtered_probs = tf.where(mask, probs, tf.zeros_like(probs))\n", + " # Renormalize\n", + " filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)\n", + " return filtered_probs\n", + "\n", + " @staticmethod\n", + " def apply_top_p_probs(probs, p):\n", + " if p is None or p >= 1.0:\n", + " return probs\n", + " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", + " sorted_probs = tf.gather(probs, sorted_indices)\n", + " cumulative_probs = tf.cumsum(sorted_probs)\n", + " mask = cumulative_probs <= p\n", + " # Always keep at least 1 token\n", + " mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)\n", + " keep_indices = tf.boolean_mask(sorted_indices, mask)\n", + " filtered_probs = tf.where(\n", + " tf.reduce_any(tf.equal(tf.range(tf.shape(probs)[0])[:, None], keep_indices), axis=1), probs,\n", + " tf.zeros_like(probs))\n", + " # Renormalize\n", + " filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)\n", + " return filtered_probs\n", + "\n", + " def generate(self,\n", + " token_ids,\n", + " do_sample=False,\n", + " max_new_tokens=None,\n", + " temperature=1.0,\n", + " top_k=None,\n", + " top_p=None,\n", + " frequency_penalty=None,\n", + " presence_penalty=None,\n", + " repetition_penalty=None):\n", + " \"\"\"\n", + " Generate text autoregressively from token IDs.\n", + " Applies filtering in sequence: penalties -> temperature -> top-k -> top-p\n", + " \"\"\"\n", + " # Convert token_ids to list if it's not already\n", + " if not isinstance(token_ids, list):\n", + " token_ids = list(token_ids)\n", + "\n", + " # Determine the actual maximum number of new tokens\n", + " if max_new_tokens is None:\n", + " max_new_tokens = self.max_sequence_length - len(token_ids)\n", + " else:\n", + " max_new_tokens = min(max_new_tokens, self.max_sequence_length - len(token_ids))\n", + "\n", + " # Initialize the generated tokens list\n", + " generated_tokens = []\n", + " current_tokens = token_ids.copy()\n", + "\n", + " # Autoregressive generation loop\n", + " for _ in range(max_new_tokens):\n", + " # Pad or truncate to max_sequence_length\n", + " if len(current_tokens) > self.max_sequence_length:\n", + " input_tokens = current_tokens[-self.max_sequence_length:]\n", + " else:\n", + " padding_needed = self.max_sequence_length - len(current_tokens)\n", + " input_tokens = current_tokens + [self.padding_token] * padding_needed\n", + "\n", + " # Convert to tensor and get model prediction\n", + " input_tensor = tf.constant([input_tokens], dtype=tf.int32)\n", + " probs_nested = self.model(input_tensor)\n", + " probs = probs_nested[0] # Already softmax probabilities (NOT logits as comment says)\n", + " logits = tf.math.log(probs + 10 ** -20) # Convert to logits for penalty application\n", + "\n", + " if do_sample:\n", + " # Apply repetition/frequency/presence penalties to logits\n", + " if frequency_penalty is not None or presence_penalty is not None:\n", + " # Collect token counts from current_tokens\n", + " token_counts = {}\n", + " for t in current_tokens:\n", + " token_counts[t] = token_counts.get(t, 0) + 1\n", + "\n", + " # Prepare penalty tensor\n", + " vocab_size = tf.shape(logits)[0]\n", + " penalties = tf.zeros_like(logits)\n", + "\n", + " for token_id, count in token_counts.items():\n", + " if token_id >= vocab_size:\n", + " continue\n", + " penalty = 0.0\n", + " if presence_penalty is not None:\n", + " penalty += presence_penalty\n", + " if frequency_penalty is not None:\n", + " penalty += frequency_penalty * count\n", + "\n", + " penalties = tf.tensor_scatter_nd_add(\n", + " penalties,\n", + " [[token_id]],\n", + " [penalty]\n", + " )\n", + "\n", + " # Subtract penalties from logits\n", + " logits = logits - penalties\n", + "\n", + " # Apply repetition penalty (standard approach)\n", + " if repetition_penalty is not None and repetition_penalty != 1.0:\n", + " # Collect unique tokens that have appeared\n", + " unique_tokens = list(set(current_tokens))\n", + " vocab_size = tf.shape(logits)[0]\n", + "\n", + " for token_id in unique_tokens:\n", + " if token_id < vocab_size:\n", + " # Divide logits of repeated tokens by penalty\n", + " logits = tf.tensor_scatter_nd_update(\n", + " logits,\n", + " [[token_id]],\n", + " [logits[token_id] / repetition_penalty]\n", + " )\n", + "\n", + " # Apply temperature\n", + " if temperature != 1.0:\n", + " logits = logits / temperature\n", + "\n", + " # Convert to probabilities\n", + " probs = tf.nn.softmax(logits)\n", + "\n", + " # Apply top-k filtering (if specified)\n", + " if top_k is not None and top_k > 0:\n", + " k = min(top_k, tf.shape(probs)[0])\n", + " # Get top-k values and indices\n", + " top_k_values, top_k_indices = tf.nn.top_k(probs, k=k, sorted=False)\n", + " # Create mask for top-k positions\n", + " top_k_mask = tf.scatter_nd(\n", + " tf.expand_dims(top_k_indices, 1),\n", + " tf.ones_like(top_k_values, dtype=tf.bool),\n", + " tf.shape(probs)\n", + " )\n", + " # Zero out non-top-k probabilities\n", + " probs = tf.where(top_k_mask, probs, tf.zeros_like(probs))\n", + " # Renormalize\n", + " probs = probs / tf.reduce_sum(probs)\n", + " print(\n", + " f\">>> After top_k: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs\")\n", + "\n", + " # Apply top-p filtering (if specified)\n", + " if top_p is not None and top_p < 1.0:\n", + " # Sort probabilities in descending order\n", + " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", + " sorted_probs = tf.gather(probs, sorted_indices)\n", + " cumulative_probs = tf.cumsum(sorted_probs)\n", + " # Create mask for top-p\n", + " mask = cumulative_probs <= top_p\n", + " # Always keep at least one token\n", + " mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)\n", + " # Get indices to keep\n", + " keep_indices = tf.boolean_mask(sorted_indices, mask)\n", + " # Create mask for original indices\n", + " filter_mask = tf.scatter_nd(\n", + " tf.expand_dims(keep_indices, 1),\n", + " tf.ones_like(keep_indices, dtype=tf.bool),\n", + " tf.shape(probs)\n", + " )\n", + " # Apply mask and renormalize\n", + " probs = tf.where(filter_mask, probs, tf.zeros_like(probs))\n", + " probs = probs / tf.reduce_sum(probs)\n", + " print(\n", + " f\">>> After top_p: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs\")\n", + "\n", + " # Sample from the final filtered distribution\n", + " # Get non-zero indices and their probabilities\n", + " non_zero_mask = probs > 1e-8\n", + " if tf.reduce_any(non_zero_mask):\n", + " filtered_indices = tf.where(non_zero_mask)[:, 0] # Get indices\n", + " filtered_probs = tf.boolean_mask(probs, non_zero_mask) # Get probabilities\n", + " # Sample\n", + " sampled_local_index = tf.random.categorical(tf.math.log(filtered_probs)[None, :], 1)[0, 0]\n", + " # Map back to vocabulary index\n", + " next_token_id = int(filtered_indices[sampled_local_index].numpy())\n", + " else:\n", + " # Fallback if all probabilities are zero\n", + " warn(\n", + " \"Token sampling had to revert to greedy sampling, because no probs had a value > 0, unexpected\")\n", + " next_token_id = int(tf.argmax(probs, axis=-1).numpy())\n", + "\n", + " else:\n", + " # Greedy sampling (argmax) - apply repetition penalty if needed\n", + " if repetition_penalty is not None and repetition_penalty != 1.0:\n", + " unique_tokens = list(set(current_tokens))\n", + " vocab_size = tf.shape(logits)[0]\n", + " for token_id in unique_tokens:\n", + " if token_id < vocab_size:\n", + " logits = tf.tensor_scatter_nd_update(\n", + " logits,\n", + " [[token_id]],\n", + " [logits[token_id] / repetition_penalty]\n", + " )\n", + "\n", + " next_token_id = int(tf.argmax(logits, axis=-1).numpy())\n", + "\n", + " # Check for termination condition\n", + " if next_token_id == self.padding_token:\n", + " break\n", + "\n", + " # Add to generated tokens and update current tokens\n", + " generated_tokens.append(int(next_token_id))\n", + " current_tokens.append(int(next_token_id))\n", + "\n", + " # Check if we've reached max sequence length\n", + " if len(current_tokens) >= self.max_sequence_length:\n", + " break\n", + "\n", + " return token_ids + generated_tokens\n", + "\n", + "```" + ], + "metadata": { + "id": "96KSf1hKoe0H" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "## How this LLM wrapper works under the hood: A Simple Overview\n", + "\n", + "- Think of a Large Language Model like the \"autocomplete\" on your cell phone's keyboard that suggests the next word.\n", + "- Now, imagine you continuously click the suggested next word.\n", + "- The model picks the mathematically most likely next word, and you just go with it, and pick the next, then the next ...\n", + "\n", + "### Here is the step-by-step flow of how it generates text.\n", + "\n", + "1. INPUT: The Prompt\n", + "\n", + "The process always starts with a piece of text from you, the user.\n", + "\n", + "\"Write a story\"\n", + "\n", + "2. STEP 1: Tokenization โ€” From Words to Numbers\n", + "\n", + "A computer doesn't understand letters or words; it understands numbers. The first step is to convert the prompt into a sequence of numbers the model can process. The tokenizer is a specialized dictionary for this job.\n", + "\n", + " What comes in: A string of text (\"Write a story\").\n", + " What goes out: A list of numerical IDs ([92, 21, 54, 21, 63, ...]).\n", + "\n", + "To make processing consistent, the input is always padded to a fixed length (e.g., 40 tokens). Any empty slots are filled with a special ID that is assigned by the tokenizer.\n", + "\n", + "\"Write a story\" -> tokenizer -> [92, 21, 54, 21, 63, 1234, 1234, ... (length 40)]\n", + "\n", + "For example it may look like:\n", + "```\n", + "92 = \"Write\"\n", + "21 = \" \"\n", + "54 = \"a\"\n", + "63 = \"story\"\n", + "1234 = \"\" (Repeated until there are 40 numbers)\n", + "```\n", + "\n", + "3. The Model's Core: Going From Token IDs to the Predicted Next Token:\n", + "\n", + "This is the \"black box\" part. Inside the model, 4 basic things happen:\n", + "\n", + " 1. Embedding (Converts the discrete, high-dimensional sequence of tokens into a continuous distribution of a smaller dimensionality).\n", + " 2. Positional embedding: Positional embedding: Takes the output of the embedding layer and represents their relative sequential order as a continuous distribution with a clear mathematical relationship.\n", + " 3. Prediction: Prediction: A lattice of Dense layers, arranged as columns and rows, each having randomized lateral connectivity with other Dense layers on the same row, and randomized vertical connectivity with Dense layers on other rows. This takes the positional embedding's output and returns a numerical answer from its head layer. This element, produced by the Cerebros NAS, serves as a more computationally efficient alternative to the attention block used in other LLMs. The output is of shape (BATCH_SIZE, VOCABULARY_SIZE) as logits.\n", + " 4. Output activation (Scales the output to a valid range). In this case, the raw output is a tensor of shape (BATCH_SIZE, VOCABULARY_SIZE). The numbers need to be cast as probabilities, so the valid range is:\n", + " - Each element in the list must be in the range between 0 and 1 (inclusive).\n", + " - The entire list of numbers must add up to 1.\n", + " - Softmax is used to accomplish this.\n", + "\n", + "As mentioned before, this is a \"Single Head\" model, unlike most LLMs (like GPT-3/4). Each call returns **only** the next token expected in the sequence, expressed as a list of probabilities (probs) of shape (BATCH_SIZE, VOCABULARY_SIZE).\n", + "\n", + "\n", + "4. Predicting the Next Word From the Output of The Final Layer:\n", + "\n", + "After the model returns a list of probabilities, we must **pick the next word** from this. There are VOCABULARY_SIZE words in the vocabulary, each assigned an index position on this list.\n", + "\n", + "5. Sampling\n", + "\n", + "- **Greedy Sampling** The naive strategy is to just pick the highest probability in this distribution (we call this greedy sampling) and assume it is the correct next token. You then decode that token ID and use it as the next word. Then de-code that toekn id and use that as the next word. Naively assuming the highest probability is correct makes for a few problems, including:\n", + " - The output will be identical every time you write the same prompt.\n", + " - Common words like \"the\", \"and\", ... will be used too often and used out of place.\n", + " - The text will seem \"dry\" and lack creative appeal.\n", + "- **Beam Sampling**: The better approach, is scaling then sampling from a few of the top choices. We apply scaling to the logits and recalculate the probabilities. Then, we eliminate unlikely possibilities. This leaves a smaller set of plausible tokens, from which we randomly select the next word. The methods we use are:\n", + " - **Presence penalty:** Steeply penalizes the logit for a token that has already been used recently or as the last word in the sequence, making it very unlikely to survive sampling and be selected. **Its purpose:** Mainly prevents the same word from being used twice **in immediate succession** \"This is **the the the** problem which **this this** scaling technique should fix.\"\n", + " - **Frequency penalty:** Mildly penalizes the logit for a token that has been **overused** in the text, but **not necessarily** the last or recent word, making it less likely to be chosen repeatedly but still possible. **For an example:** \"This technique **like** fixes **like** this from **like** happening. It's **like** really really annoying.\"\n", + " - **Repetition penalty scaling**: A penalty that balances the effects of both presence and frequency penalties, attempting to fix both problems at the same time.\n", + " - **Temperature scaling:** Temperature scaling divides logits by a number you set for 'tempterature' to control output \"creativity\" vs \"precision\". Low temperatures less than 1 make the model's top choices more likely, creating predictable text. High temperatures greater than 1, give less likely words a better chance, leading to more diverse and random text. Basically the higher you set it, the more creative and less factual the LLM's writing will be, the lower, the more precice and factual.\n", + " - After applying all scaling, we convert the logits back to probabilities using softmax. We then proceed to sampling:\n", + " - **Top k sampling**: Set a number 'k'. Eliminate all but the highest k numbers on this list of scaled probabilities.\n", + " - **Top p sampling:** Set a number 'p'. Starting from the most likely token, add up the probabilities until the sum reaches or exceeds 'p'. Keep only this cumulative set of tokens.\n", + " \n", + "Now that we have scaled and filtered the list of tokens, we randomly pick one from the remaining options.\n", + "\n", + "\n", + "6. ## The Generation Loop: We just do this on repeat.\n", + "\n", + "The model only predicts one word at a time. To complete text, we repeat this with the result of the original prompt + the result of predicting the next. We call this an **autoregressive** loop.\n", + "\n", + "\n", + " Start with a prompt \"\"Write, a, story\"\n", + " \n", + " Input: [Write, a, story]\n", + " Model predicts the token that decodes to: \"about\"\n", + "\n", + " Repeat 1: New Input: The appended sequence is fed back into the model.\n", + " New Input: [Write, a, story, about]\n", + " Model predicts: \"a\"\n", + "\n", + " Repeat 2:New Input:\n", + " New Input: [Write, a, story, about, a]\n", + " Model predicts: \"fox\"\n", + "\n", + "This loop continues until the model generates a special \"end-of-sequence\" token / pad token or it reaches its maximum length limit (40 tokens in our example).\n", + "\n", + "\n", + "\n", + "## Revisiting the analogy of the auto complete on repeat, this is what this looks like:\n" + ], + "metadata": { + "id": "kMW_6Vrq_Yi9" + } + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "NaMi9QniKqdO" + } + }, + { + "cell_type": "code", + "source": [ + "# Get the best model from the search\n", + "best_model_found = cerebros_automl.get_best_model(purge_model_storage_files='slate')\n", + "\n", + "# Create config and generative model wrapper\n", + "config = CerebrosNotGPTConfig(\n", + " max_sequence_length=MAX_SEQ_LENGTH,\n", + " padding_token=tokenizer.pad_token_id\n", + ")\n", + "generator = CerebrosNotGPT(config, model=best_model_found)\n", + "\n", + "# Test if the model can be built successfully\n", + "text = \"This is a test ...\"\n", + "input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", + "current_tokens = input_ids.copy()\n", + "PADDING_TOKEN = tokenizer.pad_token_id\n", + "\n", + "if len(current_tokens) > MAX_SEQ_LENGTH:\n", + " input_tokens = current_tokens[-MAX_SEQ_LENGTH:]\n", + "else:\n", + " padding_needed = MAX_SEQ_LENGTH - len(current_tokens)\n", + " input_tokens = current_tokens + [PADDING_TOKEN] * padding_needed\n", + "\n", + "# A dummy pass to force the model to build\n", + "\n", + "input_tensor = tf.constant([input_tokens], dtype=tf.int32)\n", + "\n", + "try:\n", + " _ = generator(input_tensor)\n", + " print(\"โœ… Building LLM Model Successful!\")\n", + "except Exception as exc:\n", + " error_message = f\"โŒ Building model returned the error: {exc}\"\n", + " print(error_message)\n" + ], + "metadata": { + "id": "AEk-TtPCxleV", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d253eeeb-831e-48ce-f256-c8f10540064a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/layers/layer.py:421: UserWarning: `build()` was called on layer 'interleaved_ro_pe', however the layer does not have a `build()` method implemented and it looks like it has unbuilt state. This will cause the layer to be marked as built, despite not being actually built, which may cause failures down the line. Make sure to implement a proper `build()` method.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "โœ… Building LLM Model Successful!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Text Generation Utilities\n", + "\n", + "We define two helper functions for text generation:\n", + "\n", + "- One for greedy sampling\n", + "- One for beam sampling with various parameters." + ], + "metadata": { + "id": "u6-wAM0XyUZC" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Required parameter\n", + "\n", + "trial_number =1\n", + "\n", + "\n", + "# Utility function for greedy sampling\n", + "def complete_text_greedy(text: str, max_new_tokens: int = 10) -> str:\n", + " input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", + " generated_tokens = generator.generate(\n", + " token_ids=input_ids,\n", + " do_sample=False,\n", + " max_new_tokens=max_new_tokens\n", + " )\n", + " generated_text = tokenizer.decode(generated_tokens).replace(text, \"\")\n", + " return generated_text\n", + "\n", + "# Utility function for beam sampling\n", + "def complete_text_beam(text: str,\n", + " max_new_tokens: int = 10,\n", + " temperature: float = 0.75,\n", + " top_k: int = 75,\n", + " top_p: float = 0.98,\n", + " repetition_penalty: float = None,\n", + " presence_penalty: float = 1.3,\n", + " frequency_penalty: float = 1.4) -> str:\n", + " input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", + " generated_tokens = generator.generate(\n", + " token_ids=input_ids,\n", + " do_sample=True,\n", + " max_new_tokens=max_new_tokens,\n", + " temperature=temperature,\n", + " top_k=top_k,\n", + " top_p=top_p,\n", + " presence_penalty=presence_penalty,\n", + " frequency_penalty=frequency_penalty\n", + " )\n", + " generated_text = tokenizer.decode(generated_tokens).replace(text, \"\")\n", + " return generated_text\n" + ], + "metadata": { + "id": "f8XigcJcykLn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Running Generation Tests\n", + "\n", + "We run a series of tests with different prompts and sampling parameters to evaluate the quality of the model from Stage I-a." + ], + "metadata": { + "id": "HG0IjcWEyrXn" + } + }, + { + "cell_type": "code", + "source": [ + "def test_text(test_prompt: str, max_new_tokens: int, result_cutoff: float, trial_id: int,\n", + " test_sample_number: int, result_0: float) -> None:\n", + " \"\"\"\n", + " If the result_0 < result_cutoff, this will run a matrix of different sampling values and print out the resulting text for human subjective evaluation.\n", + "\n", + " Parameters:\n", + " - test_prompt: a string to prompt generation\n", + " - max_new_tokens: int, number of tokens to generate unless we generate a stop token.\n", + " - sample_number: Metadata for sample...\n", + " - result_0: Perplexity score from this run\n", + " - result_cutoff: Perplexity score that would be expected to indicate a trial worth running this pn\n", + "\n", + " \"\"\"\n", + " if result_0 < result_cutoff:\n", + " generation_param_permutations = [\n", + " # #3\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.3,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " # #4\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.3,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " # #5\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 75,\n", + " 'top_p': 0.97,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.3,\n", + " 'frequency_penalty': 1.4},\n", + " # #6\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.75,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4},\n", + " # #7\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4},\n", + " # #8\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 40,\n", + " 'top_p': 0.96,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 45,\n", + " 'top_p': 0.97,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.3\n", + " }, #\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 75,\n", + " 'top_p': 0.99,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.65,\n", + " 'top_k': 75,\n", + " 'top_p': 0.985,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.8,\n", + " 'top_k': 75,\n", + " 'top_p': 0.99,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 0.7,\n", + " 'frequency_penalty': 0.7\n", + " }\n", + " ]\n", + " # Default cases, no params\n", + " response_1 = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens)\n", + " print(\n", + " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: '{test_prompt}' RESPONSE: '{response_1}'\")\n", + " # print(f\"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response_1}'.\")\n", + " response_2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens)\n", + " print(\n", + " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: '{test_prompt}' RESPONSE: '{response_2}'.\")\n", + " # print(f\"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response_2}'.\")\n", + "\n", + " for perm_0 in generation_param_permutations:\n", + " response_0 = complete_text_beam(text=test_prompt,\n", + " max_new_tokens=max_new_tokens,\n", + " temperature=perm_0['temperature'],\n", + " top_k=perm_0['top_k'],\n", + " top_p=perm_0['top_p'],\n", + " repetition_penalty=perm_0['repetition_penalty'],\n", + " presence_penalty=perm_0['presence_penalty'],\n", + " frequency_penalty=perm_0['frequency_penalty'])\n", + " print(\n", + " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE PARAMS: max_new_tokens={perm_0['max_new_tokens']} temperature={perm_0['temperature']}, top_k={perm_0['top_k']}, top_p={perm_0['top_p']}, repetition_penalty={perm_0['repetition_penalty']} presence_penalty={perm_0['presence_penalty']} frequency_penalty{perm_0['frequency_penalty']} PROMPT: '{test_prompt}' RESPONSE: '{response_0}'\")\n", + "\n", + "\n", + "prompt_samples = [\n", + " \"I saw the sun and it was as shining on the\",\n", + " \"And God said, Let there be light: and there \",\n", + " \"In the beginning God created the heavens\"\n", + "]\n", + "\n", + "\n", + "counter = 0\n", + "for sample in prompt_samples:\n", + " test_text(\n", + " test_prompt=sample,\n", + " max_new_tokens=MAX_NEW_TOKENS,\n", + " result_cutoff=15,\n", + " trial_id=trial_number,\n", + " test_sample_number=counter,\n", + " result_0=phase_i_a_result)\n", + " counter += 1\n", + "\n", + "\n", + "collect()\n" + ], + "metadata": { + "id": "hut-HAJjyvn-", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e05a9fb1-706e-4f26-e668-825f7df940c2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth the the the the the the the the the the the the the the'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth God beginning'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth. beginning created God'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 13 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 14 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created. beginning God earthless earth beginning'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth God.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 16 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' God earth beginning created heavens. earth created'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' beginning created earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created beginning earth heavens. God earth'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created earth beginning God heavens. earth'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' God earth beginning created beginning God. earth heavens created beginning heavens earth. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 18 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created earth beginning God heavens. created earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 12 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' beginning earth God created earth heavens'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 16 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth created heavens earth\\Order.cpt. the beginning'\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the the.. the the the the the the the the the the.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' earth. the'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' earth. the.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. earth the'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. the earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. the earth'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. earth'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. the earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ''\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'In the beginning God created the heavens' RESPONSE: ' heavens heavens heavens heavens and heavens heavens heavens heavens heavens heavens heavens heavens and and'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' was earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and created earth.'\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "5885" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Stage I-b: Extended Training\n", + "\n", + "- Now, we take the best model from Stage I-a and continue training it on a larger dataset.\n", + "- This uses a streaming `tf.data.Dataset` generator to allow handling of much larger data sets without using more RAM.\n", + "- This would allow us to select far more samples, but for now, we select a small subset for this small scale environment.\n", + "\n", + "## Streaming Data Generator for Large Datasets\n", + "\n", + "\n", + "The **SampleExpansionGenerator** class, which we create below:\n", + "\n", + " - Applies and streams the same preprocessing logic to the raw text samples as we did in Stage I-a.\n", + " - However, it preprocesses one **sample expansion batch** at a time and stores the resulting expanded samples in memory.\n", + " - It then feeds the resulting expanded samples to the model in batches matching the **model's BATCH_SIZE** as requested by the training loop.\n", + " - **sample expansion batch** is not the same as **the model's BATCH_SIZE**.\n", + "\n", + "For example, we could train on a dataset of 10 \\** 6 samples, while setting the **sample expansion batch size** to 100 while the **model's batch size** is 10.\n", + " - 100 raw text samples will be expoanded at a time.\n", + " - This results in thousands of expanded sub-samples being queued and ready for the model to take.\n", + " - The model will take 10 of these at a time until it does not have 10 left to provide.\n", + " - Then, the generator will then preprocess another 100 text samples and garbage collect.\n", + "\n", + "This allows training on datasets that would be much larger than available memory after expansion, making the training scalable.\n", + "\n", + "\n", + "### The sample expansion batch size should be optimized to balance two opposing forces:\n", + "\n", + " - Memory pressure increases with the number of expanded samples held in memory.\n", + " - Delays are caused by switching back and forth between tensor operations and preprocessing when batches are too small.\n", + "\n" + ], + "metadata": { + "id": "tuhQx2kjy4nn" + } + }, + { + "cell_type": "code", + "source": [ + "# Replace your existing class and function with these:\n", + "class SampleExpansionGenerator:\n", + " def __init__(self,\n", + " raw_text_samples,\n", + " tokenizer,\n", + " sample_expansion_batch_size=50,\n", + " model_batch_size=10,\n", + " prompt_length_0=PROMPT_LENGTH,\n", + " max_seq_length=MAX_SEQ_LENGTH,\n", + " vocabulary_size=VOCABULARY_SIZE):\n", + "\n", + " self.raw_text_samples = raw_text_samples\n", + " self.tokenizer = tokenizer\n", + " self.sample_expansion_batch_size = sample_expansion_batch_size\n", + " self.model_batch_size = model_batch_size\n", + " self.prompt_length_0 = prompt_length_0\n", + " self.max_seq_length = max_seq_length\n", + " self.vocabulary_size = vocabulary_size\n", + " self.data = []\n", + " self.labels = []\n", + " self.current_index = 0\n", + "\n", + " def _expand_next_batch(self):\n", + " # If we've already processed all raw samples for this epoch, do nothing.\n", + " if self.current_index >= len(self.raw_text_samples):\n", + " return\n", + "\n", + " # Determine the next meta-batch\n", + " start_idx = self.current_index\n", + " end_idx = min(start_idx + self.sample_expansion_batch_size, len(self.raw_text_samples))\n", + "\n", + " batch_samples = self.raw_text_samples[start_idx:end_idx]\n", + " self.current_index = end_idx\n", + "\n", + " # Run prepare_data on this batch\n", + " input_ids_list, labels_list, _ = prepare_data(\n", + " data_0=batch_samples,\n", + " tokenizer_0=self.tokenizer,\n", + " max_seq_length=self.max_seq_length,\n", + " prompt_length=self.prompt_length_0)\n", + "\n", + " # Add the new data to our internal queues\n", + " self.data.extend(input_ids_list)\n", + " self.labels.extend(labels_list)\n", + "\n", + " def __iter__(self):\n", + " # Reset to initial state for new epoch\n", + " self.current_index = 0\n", + " self.data = []\n", + " self.labels = []\n", + " return self\n", + "\n", + " def __next__(self):\n", + " # If queues are empty, try to expand them from raw samples\n", + " if not self.data:\n", + " self._expand_next_batch()\n", + "\n", + " # If they are STILL empty after trying to expand, the epoch is over.\n", + " if not self.data:\n", + " raise StopIteration\n", + "\n", + " # Pop and return one sample\n", + " input_sample = self.data.pop(0)\n", + " label_sample = self.labels.pop(0)\n", + "\n", + " return ((input_sample,), label_sample)\n", + "\n", + "\n", + "# Create the tf.data.Dataset\n", + "def create_dataset(raw_text_samples, tokenizer, sample_expansion_batch_size=50, model_batch_size=10) -> tf.data.Dataset:\n", + " generator_0 = SampleExpansionGenerator(\n", + " raw_text_samples=raw_text_samples,\n", + " tokenizer=tokenizer,\n", + " sample_expansion_batch_size=sample_expansion_batch_size,\n", + " model_batch_size=model_batch_size # Pass this parameter\n", + " )\n", + "\n", + " dataset = tf.data.Dataset.from_generator(\n", + " lambda: generator_0,\n", + " # output_signature=(\n", + " # (tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32),),\n", + " # # tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32), # Use generator's parameter\n", + " # tf.TensorSpec(shape=(generator_0.vocabulary_size,), dtype=tf.float32) # Use generator's parameter\n", + " # )\n", + " output_signature=(\n", + " (tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32),), # A tuple containing ONE TensorSpec\n", + " tf.TensorSpec(shape=(generator_0.vocabulary_size,), dtype=tf.float32) # A single TensorSpec\n", + " )\n", + " )\n", + "\n", + " # Batch it\n", + " dataset = dataset.batch(model_batch_size)\n", + " dataset = dataset.prefetch(tf.data.AUTOTUNE) # Prefetch for performance\n", + " return dataset\n", + "\n", + "# Create training and validation datasets\n", + "phase_i_b_train_dataset = create_dataset(\n", + " raw_text_samples=phase_i_b_train_samples,\n", + " tokenizer=tokenizer,\n", + " sample_expansion_batch_size=PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE,\n", + " model_batch_size=batch_size\n", + ")\n", + "\n", + "phase_i_b_val_dataset = create_dataset(\n", + " raw_text_samples=phase_i_b_val_samples,\n", + " tokenizer=tokenizer,\n", + " sample_expansion_batch_size=PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE,\n", + " model_batch_size=batch_size\n", + ")\n" + ], + "metadata": { + "id": "MHWWE0xIzLRD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "type(phase_i_b_train_dataset)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 121 + }, + "id": "HxwyQzSppQwp", + "outputId": "89a48aa5-c364-4057-98c4-fc4a291f448e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensorflow.python.data.ops.prefetch_op._PrefetchDataset" + ], + "text/html": [ + "
\n", + "
tensorflow.python.data.ops.prefetch_op._PrefetchDataset
def __init__(input_dataset, buffer_size, slack_period=None, name=None)
/usr/local/lib/python3.12/dist-packages/tensorflow/python/data/ops/prefetch_op.pyA `Dataset` that asynchronously prefetches its input.
\n", + " \n", + "
" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "## Model Compilation for Stage I-b\n", + "\n", + "- We recompile the model with the same base optimizer (AdamW), however this time with a custom learning rate scheduler (WarmupCosineDecayRestarts), and for disambiguation, relevant metrics for this training phase. We also add an EarlyStopping callback which is mainly being used to restore the weights from the best epoch, if that turns out to not be the last epoch.\n", + "\n", + "\n", + "## For those wanting to scale this up, a word to point out:\n", + "\n", + "The parameters for the learning rate scheduler may need to be optimized. They will be different for your data. Alternatively, you can remove the learning rate scheduler if this is too much trail and error.\n", + "\n", + "- We set the starting learning rate at: 0.0039295722955565125\n", + "- We set warmup steps to 1140, which for the data selected is 15 epochs.\n", + "- We set first decay steps to 1900, which for this data set is about 25 epochs.\n", + "\n", + "Also:\n", + "\n", + "Additionally, the early stopping callback will likely need to be adjusted. When training at scale, you may use a lower learning rate and a larger number of epochs, as well as a larger value for the start_from_epoch parameter (which specifies when to begin tracking the metric for early stopping).\n", + "\n", + "FYI, this is the custom scheduler we imported from cerebrosllmutils (CosineDecayRestarts augmented with warmup steps):\n", + "\n", + "\n", + "```python\n", + "# A custom schedule: Cosine decay with some warm - up steps\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='WarmupCosineDecayRestarts')\n", + "class WarmupCosineDecayRestarts(tf.keras.optimizers.schedules.LearningRateSchedule):\n", + " \"\"\"\n", + " A learning rate schedule that combines a linear warmup with cosine decay restarts.\n", + " \"\"\"\n", + "\n", + " def __init__(self, initial_learning_rate, warmup_steps, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0):\n", + " super().__init__()\n", + "\n", + " # Store all parameters as public attributes for get_config serialization\n", + " self.initial_learning_rate = initial_learning_rate\n", + " self.warmup_steps = warmup_steps\n", + " self.first_decay_steps = first_decay_steps\n", + " self.t_mul = t_mul\n", + " self.m_mul = m_mul\n", + " self.alpha = alpha\n", + "\n", + " # Create the CosineDecayRestarts schedule for internal logic.\n", + " # The parameters passed here are the same ones we just stored.\n", + " self.cosine_restarts_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(\n", + " initial_learning_rate=initial_learning_rate,\n", + " first_decay_steps=first_decay_steps,\n", + " t_mul=t_mul,\n", + " m_mul=m_mul,\n", + " alpha=alpha\n", + " )\n", + "\n", + "\n", + " def __call__(self, step):\n", + " step = tf.cast(step, dtype=tf.float32)\n", + "\n", + " # Calculate the learning rate for both phases unconditionally\n", + " warmup_lr = self.initial_learning_rate * step / self.warmup_steps\n", + "\n", + " # The cosine schedule is designed to start from step 0, so we give it\n", + " # the \"post-warmup\" step count.\n", + " decay_lr = self.cosine_restarts_schedule(step - self.warmup_steps)\n", + "\n", + " # Create a multiplier that is 1.0 during warmup and 0.0 after.\n", + " # tf.cast(condition, tf.float32) converts a boolean tensor to 1.0 or 0.0.\n", + " warmup_multiplier = tf.cast(step < self.warmup_steps, tf.float32)\n", + "\n", + " # The decay multiplier is the opposite.\n", + " decay_multiplier = 1.0 - warmup_multiplier\n", + "\n", + " # Combine the two learning rates. Only one will be active at a time.\n", + " return (warmup_multiplier * warmup_lr) + (decay_multiplier * decay_lr)\n", + "\n", + " def get_config(self):\n", + " # Use the stored public attributes for the config.\n", + " # This bypasses the issue of accessing private attributes (_t_mul) from\n", + " # the nested Keras object, which can be brittle.\n", + " config = {\n", + " \"initial_learning_rate\": self.initial_learning_rate,\n", + " \"warmup_steps\": self.warmup_steps,\n", + " \"first_decay_steps\": self.first_decay_steps,\n", + " \"t_mul\": self.t_mul,\n", + " \"m_mul\": self.m_mul,\n", + " \"alpha\": self.alpha,\n", + " }\n", + "\n", + " # Use from_config to properly allow deserialization\n", + " return config\n", + "```\n", + "\n" + ], + "metadata": { + "id": "DPaeJKEzzlPw" + } + }, + { + "cell_type": "code", + "source": [ + "# Define loss and metrics for Phase I-b\n", + "phase_i_b_loss = tf.keras.losses.CategoricalCrossentropy()\n", + "phase_i_b_categorical_accuracy = tf.keras.metrics.CategoricalAccuracy()\n", + "phase_i_b_perplexity = Perplexity(name=\"perplexity_phase_i_b\")\n", + "\n", + "# Create the learning rate schedule instance\n", + "lr_scheduler = WarmupCosineDecayRestarts(\n", + " initial_learning_rate=INITIAL_LR_STAGE_I_B,\n", + " warmup_steps=WARMUP_STEPS,\n", + " first_decay_steps=FIRST_DECAY_STEPS_STAGE_I_B,\n", + " t_mul=1.0,\n", + " m_mul=0.9,\n", + " alpha=0.01\n", + ")\n", + "\n", + "# Recompile the existing model\n", + "generator.model.compile(\n", + " loss=phase_i_b_loss,\n", + " metrics=[phase_i_b_categorical_accuracy, phase_i_b_perplexity],\n", + " optimizer=tf.keras.optimizers.AdamW(\n", + " learning_rate=lr_scheduler,\n", + " weight_decay=phase_i_b_weight_decay,\n", + " gradient_accumulation_steps=phase_i_b_gradient_accumulation_steps\n", + " ),\n", + " jit_compile=True\n", + ")\n", + "\n", + "# Define the Early Stopping callback\n", + "early_stopping = tf.keras.callbacks.EarlyStopping(\n", + " monitor='perplexity_phase_i_b', # Monitor validation perplexity\n", + " patience=10, # Number of epochs with no improvement after which training will be stopped.\n", + " verbose=1,\n", + " restore_best_weights=True, # Restores model weights from the epoch with the best value of the monitored metric.\n", + " mode='min',\n", + " start_from_epoch=40\n", + ")\n", + "\n", + "\n", + "callbacks_list = [early_stopping]\n" + ], + "metadata": { + "id": "GGkEVa2dzOtf" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run Stage I-b Training\n", + "\n", + "- We start the training process using the model.fit method with the new datasets and callbacks to continue training the same model on another dataset. In our at scale runs, both the previous stage and this stage are dene on far more data." + ], + "metadata": { + "id": "y_K5nLzVz_-b" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "\n", + "\n", + "\n", + "# print(\"Calculating steps per epoch...\")\n", + "# train_steps = sum(1 for _ in phase_i_b_train_dataset)\n", + "# val_steps = sum(1 for _ in phase_i_b_val_dataset)\n", + "# print(f\"Calculated training steps per epoch: {train_steps}\")\n", + "# print(f\"Calculated validation steps: {val_steps}\")\n", + "\n", + "# Train the model\n", + "phase_i_b_history = generator.model.fit(\n", + " x=phase_i_b_train_dataset,\n", + " validation_data=phase_i_b_val_dataset,\n", + " epochs=phase_i_b_epochs,\n", + " callbacks=callbacks_list\n", + ")\n", + "\n", + "# Store history and get the best validation perplexity\n", + "phase_i_b_history = pd.DataFrame(phase_i_b_history.history)\n", + "result_phase_i_b = float(phase_i_b_history['perplexity_phase_i_b'].min())\n", + "f\"Result of Stage 1-b training {result_phase_i_b}\"\n" + ], + "metadata": { + "id": "3GGqvlIl0FvV", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "0daf05b2-7072-4818-8b47-a05558b33470" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 1/53\n", + " 76/Unknown \u001b[1m69s\u001b[0m 636ms/step - categorical_accuracy: 0.0389 - loss: 13.6508 - perplexity_phase_i_b: 966782.2500" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/trainers/epoch_iterator.py:160: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.\n", + " self._interrupted_warning()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 690ms/step - categorical_accuracy: 0.0388 - loss: 13.6471 - perplexity_phase_i_b: 962529.6250 - val_categorical_accuracy: 0.0492 - val_loss: 11.5516 - val_perplexity_phase_i_b: 103939.8906\n", + "Epoch 2/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 609ms/step - categorical_accuracy: 0.0164 - loss: 13.8992 - perplexity_phase_i_b: 2969392.7500 - val_categorical_accuracy: 0.0492 - val_loss: 12.0771 - val_perplexity_phase_i_b: 175791.3594\n", + "Epoch 3/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 609ms/step - categorical_accuracy: 0.0250 - loss: 12.8039 - perplexity_phase_i_b: 402124.0625 - val_categorical_accuracy: 0.0656 - val_loss: 12.3528 - val_perplexity_phase_i_b: 231597.3438\n", + "Epoch 4/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 591ms/step - categorical_accuracy: 0.0415 - loss: 11.6595 - perplexity_phase_i_b: 140648.8125 - val_categorical_accuracy: 0.0492 - val_loss: 12.4123 - val_perplexity_phase_i_b: 245801.6250\n", + "Epoch 5/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 611ms/step - categorical_accuracy: 0.0439 - loss: 11.1954 - perplexity_phase_i_b: 73950.6797 - val_categorical_accuracy: 0.0492 - val_loss: 12.3395 - val_perplexity_phase_i_b: 228538.3750\n", + "Epoch 6/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 589ms/step - categorical_accuracy: 0.0816 - loss: 10.2579 - perplexity_phase_i_b: 29194.4102 - val_categorical_accuracy: 0.0656 - val_loss: 12.1179 - val_perplexity_phase_i_b: 183113.2031\n", + "Epoch 7/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.0590 - loss: 9.9608 - perplexity_phase_i_b: 22667.8711 - val_categorical_accuracy: 0.0492 - val_loss: 11.8740 - val_perplexity_phase_i_b: 143489.0312\n", + "Epoch 8/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m97s\u001b[0m 599ms/step - categorical_accuracy: 0.0593 - loss: 8.9806 - perplexity_phase_i_b: 8207.2861 - val_categorical_accuracy: 0.0328 - val_loss: 12.3863 - val_perplexity_phase_i_b: 239495.6562\n", + "Epoch 9/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 558ms/step - categorical_accuracy: 0.0661 - loss: 7.8740 - perplexity_phase_i_b: 2828.0859 - val_categorical_accuracy: 0.0164 - val_loss: 11.9790 - val_perplexity_phase_i_b: 159370.9219\n", + "Epoch 10/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 630ms/step - categorical_accuracy: 0.1062 - loss: 6.8127 - perplexity_phase_i_b: 987.0147 - val_categorical_accuracy: 0.0328 - val_loss: 11.2031 - val_perplexity_phase_i_b: 73360.1719\n", + "Epoch 11/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 604ms/step - categorical_accuracy: 0.0687 - loss: 5.7574 - perplexity_phase_i_b: 324.8636 - val_categorical_accuracy: 0.0164 - val_loss: 9.6458 - val_perplexity_phase_i_b: 15456.3154\n", + "Epoch 12/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m87s\u001b[0m 686ms/step - categorical_accuracy: 0.0943 - loss: 4.8160 - perplexity_phase_i_b: 124.1660 - val_categorical_accuracy: 0.0492 - val_loss: 8.6260 - val_perplexity_phase_i_b: 5574.9229\n", + "Epoch 13/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 608ms/step - categorical_accuracy: 0.1206 - loss: 4.4321 - perplexity_phase_i_b: 84.3652 - val_categorical_accuracy: 0.0328 - val_loss: 8.1588 - val_perplexity_phase_i_b: 3493.8950\n", + "Epoch 14/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m54s\u001b[0m 597ms/step - categorical_accuracy: 0.1237 - loss: 4.4953 - perplexity_phase_i_b: 91.3969 - val_categorical_accuracy: 0.0328 - val_loss: 8.3403 - val_perplexity_phase_i_b: 4189.2686\n", + "Epoch 15/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 579ms/step - categorical_accuracy: 0.0997 - loss: 4.2491 - perplexity_phase_i_b: 70.9299 - val_categorical_accuracy: 0.0656 - val_loss: 8.6163 - val_perplexity_phase_i_b: 5520.8823\n", + "Epoch 16/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m88s\u001b[0m 585ms/step - categorical_accuracy: 0.1204 - loss: 4.2542 - perplexity_phase_i_b: 70.9240 - val_categorical_accuracy: 0.0656 - val_loss: 8.7940 - val_perplexity_phase_i_b: 6594.3228\n", + "Epoch 17/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 577ms/step - categorical_accuracy: 0.1386 - loss: 4.2547 - perplexity_phase_i_b: 70.8944 - val_categorical_accuracy: 0.0984 - val_loss: 8.7318 - val_perplexity_phase_i_b: 6196.8022\n", + "Epoch 18/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 597ms/step - categorical_accuracy: 0.1209 - loss: 4.2489 - perplexity_phase_i_b: 70.4136 - val_categorical_accuracy: 0.0984 - val_loss: 8.9164 - val_perplexity_phase_i_b: 7453.2446\n", + "Epoch 19/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 567ms/step - categorical_accuracy: 0.1236 - loss: 4.2367 - perplexity_phase_i_b: 69.5275 - val_categorical_accuracy: 0.0656 - val_loss: 8.8083 - val_perplexity_phase_i_b: 6689.4990\n", + "Epoch 20/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 596ms/step - categorical_accuracy: 0.1506 - loss: 4.1450 - perplexity_phase_i_b: 63.6329 - val_categorical_accuracy: 0.0656 - val_loss: 8.6605 - val_perplexity_phase_i_b: 5770.2129\n", + "Epoch 21/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 564ms/step - categorical_accuracy: 0.1424 - loss: 4.0012 - perplexity_phase_i_b: 55.2548 - val_categorical_accuracy: 0.0820 - val_loss: 8.6945 - val_perplexity_phase_i_b: 5970.1401\n", + "Epoch 22/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 589ms/step - categorical_accuracy: 0.1520 - loss: 4.1843 - perplexity_phase_i_b: 66.0555 - val_categorical_accuracy: 0.0656 - val_loss: 8.3286 - val_perplexity_phase_i_b: 4140.4941\n", + "Epoch 23/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 557ms/step - categorical_accuracy: 0.1807 - loss: 3.8604 - perplexity_phase_i_b: 48.0663 - val_categorical_accuracy: 0.0656 - val_loss: 8.6137 - val_perplexity_phase_i_b: 5506.4224\n", + "Epoch 24/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 608ms/step - categorical_accuracy: 0.1533 - loss: 3.9858 - perplexity_phase_i_b: 54.5812 - val_categorical_accuracy: 0.1148 - val_loss: 8.5935 - val_perplexity_phase_i_b: 5396.4331\n", + "Epoch 25/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.1230 - loss: 4.0118 - perplexity_phase_i_b: 55.6288 - val_categorical_accuracy: 0.1475 - val_loss: 8.6210 - val_perplexity_phase_i_b: 5547.1172\n", + "Epoch 26/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 564ms/step - categorical_accuracy: 0.1588 - loss: 3.8591 - perplexity_phase_i_b: 47.8675 - val_categorical_accuracy: 0.1148 - val_loss: 8.4999 - val_perplexity_phase_i_b: 4914.4688\n", + "Epoch 27/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 582ms/step - categorical_accuracy: 0.1900 - loss: 3.8535 - perplexity_phase_i_b: 47.2824 - val_categorical_accuracy: 0.0820 - val_loss: 8.7680 - val_perplexity_phase_i_b: 6425.2207\n", + "Epoch 28/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 593ms/step - categorical_accuracy: 0.1927 - loss: 3.6720 - perplexity_phase_i_b: 39.7386 - val_categorical_accuracy: 0.0656 - val_loss: 8.7999 - val_perplexity_phase_i_b: 6633.3721\n", + "Epoch 29/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 594ms/step - categorical_accuracy: 0.1848 - loss: 3.8259 - perplexity_phase_i_b: 46.2804 - val_categorical_accuracy: 0.0656 - val_loss: 8.6051 - val_perplexity_phase_i_b: 5459.4458\n", + "Epoch 30/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 597ms/step - categorical_accuracy: 0.1691 - loss: 3.6890 - perplexity_phase_i_b: 40.3801 - val_categorical_accuracy: 0.0984 - val_loss: 8.5689 - val_perplexity_phase_i_b: 5265.4810\n", + "Epoch 31/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 587ms/step - categorical_accuracy: 0.1774 - loss: 3.6971 - perplexity_phase_i_b: 40.6956 - val_categorical_accuracy: 0.0984 - val_loss: 8.7037 - val_perplexity_phase_i_b: 6025.3599\n", + "Epoch 32/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.1597 - loss: 3.6218 - perplexity_phase_i_b: 37.8592 - val_categorical_accuracy: 0.0984 - val_loss: 8.7827 - val_perplexity_phase_i_b: 6520.5991\n", + "Epoch 33/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.2066 - loss: 3.6265 - perplexity_phase_i_b: 38.0441 - val_categorical_accuracy: 0.0984 - val_loss: 8.7695 - val_perplexity_phase_i_b: 6434.8853\n", + "Epoch 34/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m42s\u001b[0m 550ms/step - categorical_accuracy: 0.1622 - loss: 3.7388 - perplexity_phase_i_b: 42.4272 - val_categorical_accuracy: 0.1148 - val_loss: 8.6601 - val_perplexity_phase_i_b: 5768.0454\n", + "Epoch 35/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m41s\u001b[0m 537ms/step - categorical_accuracy: 0.1974 - loss: 3.4737 - perplexity_phase_i_b: 32.6702 - val_categorical_accuracy: 0.1148 - val_loss: 8.6486 - val_perplexity_phase_i_b: 5702.0361\n", + "Epoch 36/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 603ms/step - categorical_accuracy: 0.1640 - loss: 3.5527 - perplexity_phase_i_b: 35.4395 - val_categorical_accuracy: 0.1148 - val_loss: 8.7015 - val_perplexity_phase_i_b: 6011.7910\n", + "Epoch 37/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1779 - loss: 3.5903 - perplexity_phase_i_b: 36.4963 - val_categorical_accuracy: 0.1148 - val_loss: 8.7223 - val_perplexity_phase_i_b: 6138.1729\n", + "Epoch 38/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m96s\u001b[0m 598ms/step - categorical_accuracy: 0.1935 - loss: 3.5401 - perplexity_phase_i_b: 34.7298 - val_categorical_accuracy: 0.1148 - val_loss: 8.6995 - val_perplexity_phase_i_b: 5999.7402\n", + "Epoch 39/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m48s\u001b[0m 622ms/step - categorical_accuracy: 0.2109 - loss: 3.5383 - perplexity_phase_i_b: 34.5639 - val_categorical_accuracy: 0.1148 - val_loss: 8.6650 - val_perplexity_phase_i_b: 5796.6436\n", + "Epoch 40/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m42s\u001b[0m 555ms/step - categorical_accuracy: 0.2047 - loss: 3.5124 - perplexity_phase_i_b: 33.9720 - val_categorical_accuracy: 0.1148 - val_loss: 8.7431 - val_perplexity_phase_i_b: 6267.4624\n", + "Epoch 41/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 576ms/step - categorical_accuracy: 0.1514 - loss: 3.5711 - perplexity_phase_i_b: 35.7887 - val_categorical_accuracy: 0.0656 - val_loss: 8.9814 - val_perplexity_phase_i_b: 7953.5283\n", + "Epoch 42/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1761 - loss: 3.6074 - perplexity_phase_i_b: 37.1983 - val_categorical_accuracy: 0.0984 - val_loss: 9.0303 - val_perplexity_phase_i_b: 8352.2227\n", + "Epoch 43/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 571ms/step - categorical_accuracy: 0.1727 - loss: 3.6003 - perplexity_phase_i_b: 36.7872 - val_categorical_accuracy: 0.0328 - val_loss: 8.9927 - val_perplexity_phase_i_b: 8044.2207\n", + "Epoch 44/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m86s\u001b[0m 619ms/step - categorical_accuracy: 0.1786 - loss: 3.7416 - perplexity_phase_i_b: 42.6958 - val_categorical_accuracy: 0.1148 - val_loss: 9.1039 - val_perplexity_phase_i_b: 8990.1494\n", + "Epoch 45/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 594ms/step - categorical_accuracy: 0.2062 - loss: 3.6020 - perplexity_phase_i_b: 37.0046 - val_categorical_accuracy: 0.0984 - val_loss: 9.3867 - val_perplexity_phase_i_b: 11928.1768\n", + "Epoch 46/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 583ms/step - categorical_accuracy: 0.2035 - loss: 3.6276 - perplexity_phase_i_b: 37.9026 - val_categorical_accuracy: 0.0820 - val_loss: 9.5581 - val_perplexity_phase_i_b: 14159.1719\n", + "Epoch 47/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1784 - loss: 3.4276 - perplexity_phase_i_b: 31.0932 - val_categorical_accuracy: 0.1148 - val_loss: 9.1575 - val_perplexity_phase_i_b: 9485.0088\n", + "Epoch 48/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 583ms/step - categorical_accuracy: 0.1864 - loss: 3.4227 - perplexity_phase_i_b: 31.1301 - val_categorical_accuracy: 0.1148 - val_loss: 9.1156 - val_perplexity_phase_i_b: 9095.7666\n", + "Epoch 49/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 622ms/step - categorical_accuracy: 0.2266 - loss: 3.4226 - perplexity_phase_i_b: 30.7439 - val_categorical_accuracy: 0.0820 - val_loss: 9.4648 - val_perplexity_phase_i_b: 12897.0039\n", + "Epoch 50/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m95s\u001b[0m 589ms/step - categorical_accuracy: 0.2455 - loss: 3.4171 - perplexity_phase_i_b: 30.9408 - val_categorical_accuracy: 0.0820 - val_loss: 9.4194 - val_perplexity_phase_i_b: 12325.3525\n", + "Epoch 51/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 596ms/step - categorical_accuracy: 0.2168 - loss: 3.2941 - perplexity_phase_i_b: 27.1144 - val_categorical_accuracy: 0.0984 - val_loss: 9.3049 - val_perplexity_phase_i_b: 10991.2559\n", + "Epoch 52/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 642ms/step - categorical_accuracy: 0.1940 - loss: 3.3548 - perplexity_phase_i_b: 28.8572 - val_categorical_accuracy: 0.0984 - val_loss: 9.1126 - val_perplexity_phase_i_b: 9068.9150\n", + "Epoch 53/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 610ms/step - categorical_accuracy: 0.2291 - loss: 3.3674 - perplexity_phase_i_b: 29.2831 - val_categorical_accuracy: 0.1311 - val_loss: 9.1200 - val_perplexity_phase_i_b: 9136.3135\n", + "Restoring model weights from the end of the best epoch: 53.\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'Result of Stage 1-b training 29.637819290161133'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Stage I-b: Model Evaluation and Serialization\n", + "\n", + "After extended training, we evaluate the final model performance and save the model and tokenizer for future use.\n" + ], + "metadata": { + "id": "y8Ej2P7D0T8R" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Final Generation Tests on the Stage I-b model checkpoint\n", + "\n", + "Confirm the model works after Stage I-b training." + ], + "metadata": { + "id": "dWlYvYBq0dio" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"########### Phase I-b Model Checkpoint Generation Samples: ###########\")\n", + "\n", + "counter = 0\n", + "for sample in prompt_samples:\n", + " test_text(\n", + " test_prompt=sample,\n", + " max_new_tokens=MAX_NEW_TOKENS,\n", + " result_cutoff=60, #\n", + " trial_id=trial_number,\n", + " test_sample_number=counter,\n", + " result_0=result_phase_i_b\n", + " )\n", + " counter += 1\n" + ], + "metadata": { + "id": "YhGaTbGF0X_d", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8071bc5a-8520-4d13-82e1-cbd941297b4b" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "########### Phase I-b Model Checkpoint Generation Samples: ###########\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ',,, and fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for morning, over tree with, fruit lights bring fruit great livestock.''.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', serve'to lights produce according each kind'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', greater and that creeping waters for fifth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for, lights bird produce fourth to its with'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: 'Be and, image said birds creeping day. God'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' night, image to over fish creature earth.''\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for, to birds bird, according forth every.' man animals'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', for plant domin fruition day with'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 12 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for'lights, great waters eachBe.' fruit also'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', saying produce livestock for every lights-bearing day fifth give to.''\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', fifth give to livestock light fruitful its that day every so.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 65 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 68 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 68 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 66 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 14 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 17 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' waters,, for to bring its, fruit.' kind.' and its wild'\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there,, and fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' to each the kind fruit that in great birds day. its'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 24 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man was forth fruit great with lesser thing animals'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man to each thing multiply the so fruit in that as saw'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 17 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' fly created the so.' livestock fruit according'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' each fruitful-bearing in animals as man was'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' that themBeh fruit man in great according forth signs fruit.''\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 13 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 22 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' them. fruit'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' each created so animals'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 14 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man-bearing lesser so animals each.' wild'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 17 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' to man each bring kind fruit forth. its animals'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 18 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' so man each. fruit in as'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 65 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 66 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 66 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man-bearing said forth so in them according signs fruit'\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'In the beginning God created the heavens' RESPONSE: ',,,,, and day day day lesser'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'In the beginning God created the heavens' RESPONSE: ',Let and was living he lesser so multiply seed fruitful livestock.'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' set, and said them over was man each it'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and fruitful, to was forth them'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', earth trees seed was and day good rule forth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees them said he day good upon,' thing. fruit'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees was day to seed lesser he living earth each'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees was he said day. each living he fruit so'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 22 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 24 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 24 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', it earth creatures day living man lesser and he each'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and was living day he rule trees., according'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees it said upon man was forth day fruit each tree wing multiply'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' waters, and was so according each lesser multiply'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'In the beginning God created the heavens' RESPONSE: ',ed and to be it, multiply fruitful each'\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Save Model and Tokenizer\n", + "\n", + "- Finally, we save the tokenizer and the trained model weights to disk." + ], + "metadata": { + "id": "-oCAeR4n0mPW" + } + }, + { + "cell_type": "code", + "source": [ + "trial_number = 1 # Make sure to set this to a unique number:\n", + "# Serialize tokenizer\n", + "TOKENIZER_SAVE_PATH = f\"tokenizer-tr-{trial_number}-stage-i-b\"\n", + "tokenizer.save_pretrained(TOKENIZER_SAVE_PATH)\n", + "print(f\"Tokenizer saved to {TOKENIZER_SAVE_PATH}\")\n", + "\n", + "# Serialize model\n", + "MODEL_SAVE_PATH = f\"final_phase_ib_model_tr_{trial_number}-stage-i-b.keras\"\n", + "generator.save(MODEL_SAVE_PATH)\n", + "print(f\"Final model saved to {MODEL_SAVE_PATH}\")\n" + ], + "metadata": { + "id": "ziYdmmII0qfu", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "37a1153f-09a0-4274-9ca2-e280112e65e6" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Tokenizer saved to tokenizer-tr-1-stage-i-b\n", + "Final model saved to final_phase_ib_model_tr_1-stage-i-b.keras\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Serialization Test\n", + "\n", + "- We run an external script (test_llm_serialization.py) to validate that the saved model and tokenizer can be loaded and used correctly." + ], + "metadata": { + "id": "y9Pvhcvl0uGt" + } + }, + { + "cell_type": "code", + "source": [ + "print(f\"๐Ÿงช Running serialization test for Stage I-b trial {trial_number}...\")\n", + "result = subprocess.run(\n", + " f\"python3 test_llm_serialization.py {TOKENIZER_SAVE_PATH} {MODEL_SAVE_PATH}\",\n", + " capture_output=True,\n", + " shell=True,\n", + " text=True # Use text=True for string output\n", + ")\n", + "\n", + "if result.returncode == 0:\n", + " print(\"โœ… Serialization test passed.\")\n", + " print(\"STDOUT:\", result.stdout)\n", + "else:\n", + " print(\"โŒ Serialization test failed.\")\n", + " print(\"STDERR:\", result.stderr)\n", + " if result.stdout:\n", + " print(\"STDOUT:\", result.stdout)\n" + ], + "metadata": { + "id": "qA5Cord40yID", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "389fe0bf-c935-4f49-dd4f-8eea8672c634" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "๐Ÿงช Running serialization test for Stage I-b trial 1...\n", + "โœ… Serialization test passed.\n", + "STDOUT: โœ… Tokenizer loaded successfully.\n", + "โœ… CerebrosNotGPT model loaded successfully.\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + "๐Ÿง  (serialized) Prompt: In the beginning God created the Generated Text from Serialized Model: 'In the beginning God created the, waters each trees and to living man according them'\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# And there you have it: What it takes to build an LLM from scratch using our novel architecture.\n" + ], + "metadata": { + "id": "z1lSMQ6i03XC" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "W6lcAxij-Z5r" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From 0aca0307ebbfb4c4b0440e8aeaef5aa162c015fd Mon Sep 17 00:00:00 2001 From: David Thrower Date: Mon, 24 Nov 2025 19:18:45 -0500 Subject: [PATCH 3/4] Add copy of original Jupyter w/ execution metadata --- ...1_23_demo_train_an_llm_with_cerebros.ipynb | 6561 +++++++++++++++++ 1 file changed, 6561 insertions(+) create mode 100644 old/2025_11_23_demo_train_an_llm_with_cerebros.ipynb diff --git a/old/2025_11_23_demo_train_an_llm_with_cerebros.ipynb b/old/2025_11_23_demo_train_an_llm_with_cerebros.ipynb new file mode 100644 index 0000000..9004212 --- /dev/null +++ b/old/2025_11_23_demo_train_an_llm_with_cerebros.ipynb @@ -0,0 +1,6561 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Build our LLM From Scratch -\n", + "\n", + "## How Cerebros NotGPT works under the hood:\n", + "\n", + "\n", + "### This notebook demonstrates the end-to-end training pipeline that builds a small scale generative LLM from scratch, a small scale proof of concept for our own Cerebros NotGPT model, then fine tunes it on additional data.\n", + "\n", + "The process is divided into two main phases:\n", + "\n", + "- Phase I-a: Neural Architecture Search (NAS) - We use SimpleCerebrosRandomSearch to automatically discover an effective neural network architecture from a small dataset.\n", + "- Phase I-b: Extended Training - The best architecture found in Phase I-a is then trained on a larger dataset to improve its performance.\n", + "\n", + "Finally, the trained model is evaluated and serialized for future use.\n", + "\n", + "\n", + "## Setup and Configuration\n", + "\n", + "Note: This script is configured as a vanilla-scale demo environment (4 CPU / 16 GB RAM Linux with Python 3.12). No GPU is needed, and this will run in the free version of Google Colab. \n", + "\n", + "## Vanilla Demo\n", + "\n", + "- For production use, you would significantly increase the sample sizes and adjust other parameters accordingly.\n", + "- The quality of the text generated by this minimal demo (trained on 30 text samples at a sequence length of 40) does not represent the quality of a full-scale model generated from the same code.\n", + "- A script that can be modified to do such as availible at: https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/main/train_a_generative_llm.py" + ], + "metadata": { + "id": "nnsAHoJyWLed" + } + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NzJF6_JuWElV", + "outputId": "a0f3246f-0ccd-48ea-da55-86479bc0f93c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Python 3.12.12\n" + ] + } + ], + "source": [ + "! python --version" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Getting started: Download the repo and go to the main directory of the repo" + ], + "metadata": { + "id": "f6TD2XsKPJIY" + } + }, + { + "cell_type": "code", + "source": [ + "# Download the repo\n", + "! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AcECFSs7WVsi", + "outputId": "9fd59935-35d4-4a08-9c8a-fb01fd3e4f03" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'cerebros-core-algorithm-alpha'...\n", + "remote: Enumerating objects: 8036, done.\u001b[K\n", + "remote: Counting objects: 100% (1737/1737), done.\u001b[K\n", + "remote: Compressing objects: 100% (321/321), done.\u001b[K\n", + "remote: Total 8036 (delta 1612), reused 1449 (delta 1411), pack-reused 6299 (from 2)\u001b[K\n", + "Receiving objects: 100% (8036/8036), 65.90 MiB | 21.67 MiB/s, done.\n", + "Resolving deltas: 100% (3116/3116), done.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# set the working directory\n", + "%cd cerebros-core-algorithm-alpha" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mCpJGfD2WfLj", + "outputId": "e0fe8c05-6154-41cd-f489-08cfd2ad0fa8" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/cerebros-core-algorithm-alpha/cerebros-core-algorithm-alpha\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Next install all dependencies.\n", + "\n", + "There are 2 requirement files:\n", + " - requirements.txt: The core requirements of the neural architecture search\n", + " - cicd-requirements.txt: Requirements for NLP and text generation" + ], + "metadata": { + "id": "yT4hPXOKPU_8" + } + }, + { + "cell_type": "code", + "source": [ + "# Install the requirements for the core algorithm\n", + "! pip install -r requirements.txt; pip install -r cicd-requirements.txt" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "nwElyEdpW90P", + "outputId": "170e2158-b7a9-49f0-ce63-22c4c7410f33" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: jax==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 1)) (0.5.3)\n", + "Requirement already satisfied: jaxlib==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 2)) (0.5.3)\n", + "Requirement already satisfied: pendulum==3.0.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 3)) (3.0.0)\n", + "Collecting tensorflow==2.20.0 (from -r requirements.txt (line 4))\n", + " Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n", + "Collecting numpy==2.3.5 (from -r requirements.txt (line 5))\n", + " Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n", + "Requirement already satisfied: pandas==2.3.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 6)) (2.3.3)\n", + "Requirement already satisfied: pyvis==0.3.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 7)) (0.3.2)\n", + "Requirement already satisfied: plotly==5.20.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 8)) (5.20.0)\n", + "Requirement already satisfied: matplotlib==3.10.7 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 9)) (3.10.7)\n", + "Requirement already satisfied: imageio==2.37.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 10)) (2.37.2)\n", + "Requirement already satisfied: tqdm==4.67.1 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 11)) (4.67.1)\n", + "Requirement already satisfied: ml_dtypes>=0.4.0 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (0.5.4)\n", + "Requirement already satisfied: opt_einsum in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (3.4.0)\n", + "Requirement already satisfied: scipy>=1.11.1 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (1.16.3)\n", + "Requirement already satisfied: python-dateutil>=2.6 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2.9.0.post0)\n", + "Requirement already satisfied: tzdata>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2025.2)\n", + "Requirement already satisfied: time-machine>=2.6.0 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (3.1.0)\n", + "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.4.0)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.9.23)\n", + "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.6.0)\n", + "Requirement already satisfied: google_pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.2.0)\n", + "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (18.1.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.0)\n", + "Requirement already satisfied: protobuf>=5.28.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (5.29.5)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.32.4)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (75.2.0)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.17.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.2.0)\n", + "Requirement already satisfied: typing_extensions>=3.6.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (4.15.0)\n", + "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.0.1)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.76.0)\n", + "Collecting tensorboard~=2.20.0 (from tensorflow==2.20.0->-r requirements.txt (line 4))\n", + " Using cached tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: keras>=3.10.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10.0)\n", + "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.15.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas==2.3.3->-r requirements.txt (line 6)) (2025.2)\n", + "Requirement already satisfied: ipython>=5.3.0 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (7.34.0)\n", + "Requirement already satisfied: jinja2>=2.9.6 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.1.6)\n", + "Requirement already satisfied: jsonpickle>=1.4.1 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (4.1.1)\n", + "Requirement already satisfied: networkx>=1.11 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.5)\n", + "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.12/dist-packages (from plotly==5.20.0->-r requirements.txt (line 8)) (8.5.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.3.3)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (4.60.1)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.4.9)\n", + "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (11.3.0)\n", + "Requirement already satisfied: pyparsing>=3 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (3.2.5)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.45.1)\n", + "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.19.2)\n", + "Requirement already satisfied: decorator in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.4.2)\n", + "Requirement already satisfied: pickleshare in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.5)\n", + "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (5.7.1)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.52)\n", + "Requirement already satisfied: pygments in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (2.19.2)\n", + "Requirement already satisfied: backcall in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.0)\n", + "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.1)\n", + "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.9.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2>=2.9.6->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.3)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (13.9.4)\n", + "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.0)\n", + "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.18.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2025.11.12)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.1.3)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.12/dist-packages (from jedi>=0.16->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.8.5)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.12/dist-packages (from pexpect>4.3->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.0)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.12/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.14)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (4.0.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.2)\n", + "Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.7 MB)\n", + "Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.6 MB)\n", + "Using cached tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n", + "Installing collected packages: numpy, tensorboard, tensorflow\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 1.26.4\n", + " Uninstalling numpy-1.26.4:\n", + " Successfully uninstalled numpy-1.26.4\n", + " Attempting uninstall: tensorboard\n", + " Found existing installation: tensorboard 2.19.0\n", + " Uninstalling tensorboard-2.19.0:\n", + " Successfully uninstalled tensorboard-2.19.0\n", + " Attempting uninstall: tensorflow\n", + " Found existing installation: tensorflow 2.19.1\n", + " Uninstalling tensorflow-2.19.1:\n", + " Successfully uninstalled tensorflow-2.19.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "scikit-learn 1.4.1.post1 requires numpy<2.0,>=1.19.5, but you have numpy 2.3.5 which is incompatible.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n", + "tensorflow-text 2.19.0 requires tensorflow<2.20,>=2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n", + "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", + "numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.5 which is incompatible.\n", + "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", + "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n", + "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n", + "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n", + "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "tf-keras 2.19.0 requires tensorflow<2.20,>=2.19, but you have tensorflow 2.20.0 which is incompatible.\n", + "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed numpy-2.3.5 tensorboard-2.20.0 tensorflow-2.20.0\n", + "Requirement already satisfied: tensorflow-text==2.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 1)) (2.19.0)\n", + "Requirement already satisfied: keras-nlp==0.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 2)) (0.19.0)\n", + "Requirement already satisfied: scikit-learn==1.4.1.post1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 3)) (1.4.1.post1)\n", + "Requirement already satisfied: tensorflow-hub==0.16.1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 4)) (0.16.1)\n", + "Requirement already satisfied: transformers==4.54.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 5)) (4.54.0)\n", + "Collecting tensorflow<2.20,>=2.19.0 (from tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n", + " Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n", + "Requirement already satisfied: keras-hub==0.19.0 in /usr/local/lib/python3.12/dist-packages (from keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.19.0)\n", + "Collecting numpy<2.0,>=1.19.5 (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3))\n", + " Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.16.3)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.5.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (3.6.0)\n", + "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (5.29.5)\n", + "Requirement already satisfied: tf-keras>=2.14.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (2.19.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.20.0)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.36.0)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (25.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (6.0.3)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2024.11.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.32.4)\n", + "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.21.4)\n", + "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.7.0)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.67.1)\n", + "Requirement already satisfied: keras>=3.5 in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (3.10.0)\n", + "Requirement already satisfied: absl-py in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (1.4.0)\n", + "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (13.9.4)\n", + "Requirement already satisfied: kagglehub in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.3.13)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.3.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.15.0)\n", + "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (1.2.0)\n", + "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.6.3)\n", + "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (25.9.23)\n", + "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.6.0)\n", + "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.2.0)\n", + "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (18.1.1)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.4.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (75.2.0)\n", + "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.17.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.2.0)\n", + "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (2.0.1)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.76.0)\n", + "Collecting tensorboard~=2.19.0 (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n", + " Using cached tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.15.1)\n", + "Requirement already satisfied: ml-dtypes<1.0.0,>=0.5.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.5.4)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.11.12)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.45.1)\n", + "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.0)\n", + "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.18.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.10)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.1.3)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (4.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (2.19.2)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.2)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.12/dist-packages (from werkzeug>=1.0.1->tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.0.3)\n", + "Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)\n", + "Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (645.0 MB)\n", + "Using cached tensorboard-2.19.0-py3-none-any.whl (5.5 MB)\n", + "Installing collected packages: numpy, tensorboard, tensorflow\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 2.3.5\n", + " Uninstalling numpy-2.3.5:\n", + " Successfully uninstalled numpy-2.3.5\n", + " Attempting uninstall: tensorboard\n", + " Found existing installation: tensorboard 2.20.0\n", + " Uninstalling tensorboard-2.20.0:\n", + " Successfully uninstalled tensorboard-2.20.0\n", + " Attempting uninstall: tensorflow\n", + " Found existing installation: tensorflow 2.20.0\n", + " Uninstalling tensorflow-2.20.0:\n", + " Successfully uninstalled tensorflow-2.20.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n", + "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", + "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", + "pytensor 2.35.1 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.\n", + "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n", + "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n", + "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.19.1 which is incompatible.\n", + "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n", + "shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.\n", + "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed numpy-1.26.4 tensorboard-2.19.0 tensorflow-2.19.1\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "numpy", + "tensorflow" + ] + }, + "id": "d3a167bbbde043ef9a994c35060fda79" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **RESTART THE SESSION**\n", + "\n", + "Then proceed to the next cell which imports all necessary libraries and defines global constants and hyperparameters for the entire pipeline.\n" + ], + "metadata": { + "id": "v69rLBcmXyGD" + } + }, + { + "cell_type": "code", + "source": [ + "! ls" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ubtKyfBQzFEW", + "outputId": "6cbe44e6-3ce7-4227-982a-88d0d36d2205" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "cerebros-core-algorithm-alpha sample_data\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 1. # **ONLY IF** the directory cerebros-core-algorithm-alpha is not still\n", + "# there, clone the directory again.\n", + "# ! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git\n", + "\n", + "# 2. Set the working directory (in the new session) - DO run this.\n", + "%cd cerebros-core-algorithm-alpha" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NemXTsYgfE0s", + "outputId": "ca92342f-1f82-42ee-8562-980b1c8dd849" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/cerebros-core-algorithm-alpha\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Verify we are in the right place:\n", + "! pwd" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D3K4dSVQhrIc", + "outputId": "5a45fa94-1bb3-46ce-c362-27f456221fd6" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/cerebros-core-algorithm-alpha\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Standard library imports\n", + "import subprocess\n", + "import time\n", + "from gc import collect\n", + "\n", + "# Third-party library imports\n", + "import tensorflow as tf\n", + "import pandas as pd\n", + "import pendulum\n", + "from transformers import AutoTokenizer\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# Cerebros specific imports\n", + "from cerebros.units.units import DenseUnit\n", + "from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search import SimpleCerebrosRandomSearch\n", + "from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component import (\n", + " zero_7_exp_decay,\n", + " zero_95_exp_decay,\n", + " simple_sigmoid\n", + ")\n", + "from cerebrosllmutils.llm_utils import (\n", + " prepare_data,\n", + " InterleavedRoPE,\n", + " Perplexity,\n", + " CerebrosNotGPTConfig,\n", + " CerebrosNotGPT,\n", + " WarmupCosineDecayRestarts\n", + ")\n", + "\n", + "# Import the data source: Format List[str]\n", + "from vanilladatasets.web_english_bible import samples as bible\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WKCdCv96X4YX", + "outputId": "875f6626-4f4b-426c-c697-da9f186e440a" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/jaxlib/plugin_support.py:71: RuntimeWarning: JAX plugin jax_cuda12_plugin version 0.7.2 is installed, but it is not compatible with the installed jaxlib version 0.5.3, so it will not be used.\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Data and Training Constants\n", + "\n", + "These parameters control the amount of data used and the behavior of the training stages.\n", + "\n", + "- **PHASE_I_A_SAMPLES_TO_CREATE**: Size of the subset of the dataset used for the NAS (Neural Architecture Search) stage (number of text samples).\n", + "- **PHASE_I_B_SAMPLES_TO_CREATE**: Number of samples to use for the main training task stage after Neural Architecture Search is completed.\n", + "- **PHASE_I_B_VAL_SPLIT**: Fraction of data for validation in Phase I-b.\n", + "- **PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE**: Batch size for preprocessing in Phase I-b to manage RAM.\n", + "- **PROMPT_LENGTH**: Number of tokens provided to the model to predict the next token. It is recommended to keep this as 1.\n" + ], + "metadata": { + "id": "rK0LZP7KbQqm" + } + }, + { + "cell_type": "code", + "source": [ + "# Samples to use for the neural architecture search stage\n", + "PHASE_I_A_SAMPLES_TO_CREATE = 10\n", + "\n", + "# Samples to use for the main training stage\n", + "PHASE_I_B_SAMPLES_TO_CREATE = 20\n", + "PHASE_I_B_VAL_SPLIT = 0.15\n", + "\n", + "# For Stage I-b, we preprocess in batches to avoid high RAM usage.\n", + "PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE = 10\n", + "\n", + "# How many tokens to provide before expecting the next token to be predicted.\n", + "PROMPT_LENGTH = 1\n" + ], + "metadata": { + "id": "vywbZQxAZC9R" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Model and Embedding Constants\n", + "\n", + "These constants define the size and shape of the model's text processing components.\n", + "\n", + "- **MAX_SEQ_LENGTH**: The maximum sequence length the model will handle. This has a linear relationship with RAM/CPU usage.\n", + "- **tokenizer_checkpoint**: The Hugging Face model to use for tokenization.\n", + "- **EMBEDDING_N**: A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). The resulting embedding dimensionality (EMBEDDING_DIM) for InterleavedRoPE must be an even number. Using this parameter as a proxy, rather than setting EMBEDDING_DIM directly, acts as a guard rail to ensure this constraint is met.\n", + "- **PROJECTION_N**: Controls the size of a projection layer after embedding. Increasing this value can significantly increase RAM usage.\n" + ], + "metadata": { + "id": "5jK5wbA5b8se" + } + }, + { + "cell_type": "code", + "source": [ + "# Text encoding / embedding related constants\n", + "MAX_SEQ_LENGTH = 40\n", + "\n", + "# Tokenization\n", + "tokenizer_checkpoint = \"HuggingFaceTB/SmolLM3-3B\"\n", + "tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)\n", + "\n", + "# Add special tokens for potential instruction-following formats\n", + "special_tokens = {\n", + " \"additional_special_tokens\": [\"\", \"\", \"\", \"\"]\n", + "}\n", + "tokenizer.add_special_tokens(special_tokens)\n", + "\n", + "VOCABULARY_SIZE = len(tokenizer)\n", + "\n", + "# For InterleavedRoPE, the embedding output dim must be an even number.\n", + "EMBEDDING_N = 6\n", + "EMBEDDING_DIM = int(EMBEDDING_N * 2)\n", + "\n", + "# Size of the projection layer. Keep low to manage RAM.\n", + "PROJECTION_N = 1\n" + ], + "metadata": { + "id": "4Kka_A4tb3aJ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6c85d1ae-52f4-4ddf-d768-ea5781b1b7da" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Stage I-a (NAS) Hyperparameters\n", + "\n", + "These parameters control the Neural Architecture Search process.\n", + "\n", + "- **moities_to_try**: Number of different layer permutations to try.\n", + "- **tries_per_moity**: Number of topologies to try for each permutation.\n", + "- **epochs, batch_size, learning_rate**: Standard training parameters for the NAS stage.\n", + "- **predecessor_level_connection_affinity_factor_first**: Controls connectivity density between the Input layer and the first level of Dense layers.\n", + "- **predecessor_level_connection_affinity_factor_main**: Controls connectivity density between the Input layer and the first level of Dense layers and the subsequent level of Dense layers, as well as all subsequent vertical connectivity.\n", + "- **p_lateral_connection, num_lateral_connection_tries_per_unit**: Control the density of lateral connectivity between Dense layers on the same row.\n", + "- **minimum_levels, maximum_levels**: Number of **rows of** Dense layers in the architecture grid.\n", + "- **minimum_units_per_level, maximum_units_per_level**: Number of Dense layers per row.\n", + "- **minimum_neurons_per_unit, maximum_neurons_per_unit**: The number of neurons for each Dense layer unit.\n" + ], + "metadata": { + "id": "MeoWtePacWz_" + } + }, + { + "cell_type": "code", + "source": [ + "# Cerebros [non-HP-tunable] configurables for NAS\n", + "moities_to_try = 3\n", + "tries_per_moity = 1\n", + "\n", + "### Main tunable hyperparameters for NAS ##\n", + "\n", + "POSITIONAL_EMBEDDING_DROPOUT = 0.7651951380000674\n", + "activation = 'softplus'\n", + "\n", + "# Vertical connectivity hyperparameters\n", + "predecessor_level_connection_affinity_factor_first = 17.851026458010523\n", + "predecessor_level_connection_affinity_factor_main = 21.487301631581428\n", + "\n", + "# Lateral connectivity hyperparameters\n", + "max_consecutive_lateral_connections = 7\n", + "p_lateral_connection = 0.24927354102044022\n", + "num_lateral_connection_tries_per_unit = 32\n", + "learning_rate = 0.003025583248301791\n", + "epochs = 41\n", + "batch_size = 5\n", + "gradient_accumulation_steps = 4\n", + "\n", + "# Architecture grid constraints\n", + "minimum_levels = 2\n", + "maximum_levels = 2\n", + "minimum_units_per_level = 2\n", + "maximum_units_per_level = 2\n", + "minimum_neurons_per_unit = 2\n", + "maximum_neurons_per_unit = 2\n" + ], + "metadata": { + "id": "Wbowkxnbc4Zd" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Phase I-b (Extended Training) Hyperparameters\n", + "\n", + "These parameters are for fine-tuning the best model from Stage I-a.\n", + "\n", + "- INITIAL_LR_STAGE_I_B: Initial learning rate for this phase.\n", + "- WARMUP_EPOCHS_STAGE_I_B, WARMUP_STEPS: Parameters for the learning rate scheduler.\n", + "- phase_i_b_epochs: Number of epochs for extended training.\n", + "- phase_i_b_weight_decay: Weight decay for the optimizer.\n" + ], + "metadata": { + "id": "fcGTs9ASdXps" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "## Training Stage I-b parameters:\n", + "INITIAL_LR_STAGE_I_B = 0.0039295722955565125\n", + "WARMUP_EPOCHS_STAGE_I_B = 7\n", + "WARMUP_STEPS = 1140\n", + "FIRST_DECAY_STEPS_STAGE_I_B = 1900\n", + "phase_i_b_epochs = 53\n", + "phase_i_b_gradient_accumulation_steps = 7\n", + "phase_i_b_weight_decay = 0.01647018768215773 # For AdamW\n" + ], + "metadata": { + "id": "-znwaddIdiKU" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Generation Constants\n", + "\n", + "Parameters used during the text generation evaluation phase." + ], + "metadata": { + "id": "vy5y6OXhdvzV" + } + }, + { + "cell_type": "code", + "source": [ + "## Generation time configurables:\n", + "GENERATION_PROMPT_LEN = 25\n", + "MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN" + ], + "metadata": { + "id": "JHjCz9qXd5Gq" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# **Data Preparation**\n", + "\n", + "Here, we load and subset the dataset for both training Stages.\n", + "\n", + "\n", + "We first split the Bible text samples into two sets: one for Phase I-a (NAS) and a larger one for Phase I-b (extended training).\n" + ], + "metadata": { + "id": "N7fJIZ1md-0Y" + } + }, + { + "cell_type": "code", + "source": [ + "# Get training data from the bible text samples\n", + "non_instruct_samples = bible[:PHASE_I_A_SAMPLES_TO_CREATE]\n", + "phase_i_b_samples = bible[PHASE_I_A_SAMPLES_TO_CREATE:PHASE_I_B_SAMPLES_TO_CREATE + PHASE_I_A_SAMPLES_TO_CREATE]\n", + "\n", + "print(f\"Samples from KJV bible consisting of {len(non_instruct_samples)} look like this (sub-sample of 3): {non_instruct_samples[:3]}\")\n" + ], + "metadata": { + "id": "jIFxWcBzeLjN", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d46f8e34-3d7d-4fb4-dddc-bf1c45bae7ee" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Samples from KJV bible consisting of 10 look like this (sub-sample of 3): ['In the beginning God created the heavens and the earth.', \"The earth was formless and empty, with darkness over the deep and God's Spirit hovering over the waters.\", \"God said, 'Let there be light,' and there was light.\"]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Preprocess Data for Phase I-a (NAS)\n", + "\n", + "The Cerebros LLM is a single head model. This means that each time the model is called, it returns only the next token. It does not regurgitate the cumulative sequence, nor does it have a separate head for each position in the sequence.\n", + "\n", + "For both training stages, each text sample is expanded into multiple input/label pairs, which we call \"sub-samples.\" There is one \"sub-sample\" for each token in the range between the first token and the first occurrence of a padding token or the end of the sequence, whichever comes first.\n", + "\n", + "For example, the sequence [t1, t2, t3] becomes:\n", + "\n", + " Input: [t1, 2, 2, 2] Label: [t2] # One hot encoded to VOCABULARY_SIZE\n", + " Input: [t1, t2, 2, 2], Label: [t3]\n", + " Input: [t1, t2, t3, 2], Label: [2]\n", + "\n", + "For training Stage 1-a, we perform the entire expansion for its training data in memory. This is because the NAS does not yet support a tf.data.Dataset object. In the future, we may retrofit the NAS algorithm to support streaming preprocessing as well, allowing us to use a larger dataset for the NAS.\n", + "\n", + "For stage I-b, the extended training stage, the same operation is done in batches. This is because this operation significantly increases the amount of memory required. The main reason for this is the one-hot encoded label, where the vocabulary size is 128,260. Since we do this in batches, this allows for a virtually unlimited number of samples to be processed.\n", + "\n", + "For reference, this is the preprocessing being applied:\n", + "\n", + "```python\n", + "def prepare_data(\n", + " data_0: List[str],\n", + " tokenizer_0: Any,\n", + " max_seq_length: int = 1024,\n", + " prompt_length: int = 1) -> Tuple[List[List[int]], List[List[int]], int]:\n", + "\n", + "\n", + " all_input_ids = []\n", + " all_labels = []\n", + "\n", + " pad_token_id = tokenizer_0.pad_token_id\n", + "\n", + " # Tokenize all data at once for efficiency\n", + " tokenized_data = tokenizer_0(\n", + " data_0,\n", + " max_length=max_seq_length,\n", + " padding='max_length',\n", + " truncation=True,\n", + " add_special_tokens=False # We'll handle special tokens manually\n", + " )\n", + " vocab_size = len(tokenizer_0)\n", + "\n", + " # Get the token ID for \n", + " end_prompt_token_id = tokenizer_0.encode(\"\", add_special_tokens=False)[0]\n", + "\n", + " # Process each sample\n", + " for sample_tokens in tokenized_data['input_ids']:\n", + " # Find the index of token\n", + " try:\n", + " end_prompt_index = sample_tokens.index(end_prompt_token_id)\n", + " except ValueError:\n", + " # If not found, treat sample as a non-instruct sample\n", + " end_prompt_index = (\n", + " prompt_length - 1) # int(np.ceil(len(sample_tokens) * (1/3))) # 0 ## 1. Give it a fair starting place to predict the next word 2. reduce the number of expanded samples\n", + "\n", + " # Find first pad token after \n", + " first_pad_index = None\n", + " for i in range(end_prompt_index + 1, len(sample_tokens)):\n", + " if sample_tokens[i] == pad_token_id:\n", + " first_pad_index = i\n", + " break\n", + "\n", + " # If no pad token found, use the end of sequence\n", + " if first_pad_index is None:\n", + " first_pad_index = len(sample_tokens)\n", + "\n", + " # Apply sliding window from after to first pad token\n", + " # Start from end_prompt_index + 1 (first token to predict)\n", + " # End at first_pad_index - 1 (last token to predict)\n", + " for i in range(end_prompt_index + 1, first_pad_index):\n", + " # Input: from start up to (but not including) token i\n", + " input_ids = sample_tokens[:i]\n", + "\n", + " # Pad or truncate to max_seq_length\n", + " if len(input_ids) > max_seq_length:\n", + " input_ids = input_ids[:max_seq_length]\n", + " else:\n", + " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n", + "\n", + " # Label: one-hot encoding of token at position i\n", + " next_token = sample_tokens[i]\n", + " label = [0] * vocab_size\n", + " label[next_token] = 1\n", + "\n", + " all_input_ids.append(input_ids)\n", + " all_labels.append(label)\n", + "\n", + " # Add final sample with pad token as label to indicate termination\n", + " if first_pad_index < len(sample_tokens): # Only if there's actually a pad token\n", + " input_ids = sample_tokens[:first_pad_index]\n", + "\n", + " # Pad or truncate to max_seq_length\n", + " if len(input_ids) > max_seq_length:\n", + " input_ids = input_ids[:max_seq_length]\n", + " else:\n", + " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n", + "\n", + " # Label: one-hot encoding of pad token\n", + " label = [0] * vocab_size\n", + " label[pad_token_id] = 1\n", + "\n", + " all_input_ids.append(input_ids)\n", + " all_labels.append(label)\n", + "\n", + " return all_input_ids, all_labels, vocab_size\n", + "```\n" + ], + "metadata": { + "id": "8Tu8X9cVeQVD" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Preprocess data for Stage I-a training\n", + "x, y, vocab_size = prepare_data(data_0=non_instruct_samples,\n", + " tokenizer_0=tokenizer,\n", + " max_seq_length=MAX_SEQ_LENGTH,\n", + " prompt_length=PROMPT_LENGTH)\n", + "\n", + "# Split the preprocessed data for NAS training and validation\n", + "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.85, shuffle=False)\n", + "\n", + "# Package data into lists for the Cerebros AutoML component\n", + "x_train_tf = tf.constant(X_train, tf.int32)\n", + "y_train_tf = tf.constant(y_train, tf.float32)\n", + "x_train_packaged = [x_train_tf]\n", + "y_train_packaged = [y_train_tf]\n", + "\n", + "# Do the same for the validation data\n", + "x_test_tf = tf.constant(X_test, tf.int32)\n", + "y_test_tf = tf.constant(y_test, tf.float32)\n", + "x_test_packaged = [x_test_tf]\n", + "y_test_packaged = [y_test_tf]\n", + "\n", + "# Define input and output shapes for the AutoML model\n", + "INPUT_SHAPES = [(MAX_SEQ_LENGTH,)]\n", + "OUTPUT_SHAPES = [(VOCABULARY_SIZE)]\n" + ], + "metadata": { + "id": "EDyuTMLufYvs" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Train, Test Split of the Data for Stage I-b training\n", + "\n", + "We split the larger Phase I-b dataset into training and validation sets. Again, this dataset will be processed by a streaming generator in batches to avoid memory saturation and make the training more scalable. We will revisit that later." + ], + "metadata": { + "id": "zX60zcpykasl" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Split the phase I-b data set for training and validation\n", + "phase_i_b_train_samples, phase_i_b_val_samples = train_test_split(\n", + " phase_i_b_samples,\n", + " test_size=PHASE_I_B_VAL_SPLIT,\n", + " shuffle=False\n", + ")\n" + ], + "metadata": { + "id": "SMSdkFRPkg7D" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "phase_i_b_train_samples[:3]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Oqw-T7bOo1GD", + "outputId": "2e8f24fc-24c2-4a06-babb-550b676b7751" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[\"God said, 'Let the earth produce vegetation, seed-bearing plants, and fruit trees, each according to its kind,' and it was so.\",\n", + " 'The earth brought forth grass, seed-bearing herbs, and fruit trees, each with its seed, and God saw that it was good.',\n", + " 'There was evening and morning, the third day.']" + ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "X_train[:2]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Hv_52izIjOQ7", + "outputId": "e2972924-0190-4f16-9317-c00100486203" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[[644,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012],\n", + " [644,\n", + " 279,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012,\n", + " 128012]]" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Base Text Embedding Model Definition\n", + "\n", + "- Before we run the NAS, we define a base model that handles token embeddings and positional embeddings.\n", + "- The SimpleCerebrosRandomSearch will then attach its auto-generated lattice of dense layers on top of this base model.\n", + "- The Cerebros NAS takes an init parameter base_models: List[tf.keras.Model]\n" + ], + "metadata": { + "id": "11Ri4PtKktih" + } + }, + { + "cell_type": "code", + "source": [ + "####### Text embedding base model #####################\n", + "\n", + "inp = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype=tf.int32)\n", + "\n", + "# Token embedding layer\n", + "embedded = tf.keras.layers.Embedding(\n", + " input_dim=VOCABULARY_SIZE,\n", + " output_dim=EMBEDDING_DIM,\n", + " input_length=MAX_SEQ_LENGTH,\n", + " mask_zero=False\n", + ")(inp)\n", + "\n", + "# Interleaved Rotary Positional Embedding (iRoPE)\n", + "position_embedding = InterleavedRoPE(\n", + " dim=EMBEDDING_DIM,\n", + " max_seq_len=MAX_SEQ_LENGTH,\n", + ")(embedded)\n", + "\n", + "# Concatenate token and positional embeddings\n", + "x = tf.keras.layers.Concatenate()([embedded, position_embedding])\n", + "x = tf.keras.layers.Dropout(POSITIONAL_EMBEDDING_DROPOUT)(x)\n", + "\n", + "# Flatten and project to the desired dimension\n", + "flattened = tf.keras.layers.Flatten()(x)\n", + "projected = tf.keras.layers.Dense(EMBEDDING_DIM * PROJECTION_N)(flattened)\n", + "\n", + "# Create the base Keras model\n", + "cerebros_base_model = tf.keras.Model(\n", + " inputs=inp,\n", + " outputs=projected\n", + ")\n" + ], + "metadata": { + "id": "tn1qrGISn_Pe", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e76e091c-6e7f-4820-ef79-15143f1e6b64" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/layers/core/embedding.py:97: UserWarning: Argument `input_length` is deprecated. Just remove it.\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## FYI: The iRoPE Embedding:\n", + "\n", + "The RoPE embedding, and helper functions it depends on (previously imported from the local package cerebrosllmutils):\n", + "\n", + "- iRoPE: Interleaved Rotary Positional Embedding\n", + "- RoPE: Rotary Positional Embedding\n", + "- The Rotary Positional Embedding expresses positional relationships as angles, extends feasible context window.\n", + "- iRoPE: iRoPE applies the rotation in an interleaved manner and enables capturing more nuance and extending context windows feasible to around 2 million tokens.\n", + "\n", + "```python\n", + "# --- Base Rotary Positional Embedding\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='RotaryEmbedding')\n", + "class RotaryEmbedding(tf.keras.layers.Layer):\n", + " def __init__(self, dim, max_seq_len=1024, temperature=10000.0, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.dim = dim\n", + " # Ensure dim is even right at initialization\n", + " if self.dim % 2 != 0:\n", + " raise ValueError(f\"Embedding dimension `dim` ({self.dim}) must be even for RotaryEmbedding.\")\n", + " self.max_seq_len = max_seq_len\n", + " self.temperature = temperature\n", + " # *** No calculation or storage of inv_freq here or in build ***\n", + "\n", + " def build(self, input_shape):\n", + " # Build should primarily be for creating trainable weights, which we don't have.\n", + " # Call super().build() for Keras compatibility.\n", + " super().build(input_shape)\n", + "\n", + " def call(self, x): # Removed seq_len argument, calculate from x\n", + " shape = tf.shape(x)\n", + " batch_size = shape[0]\n", + " actual_seq_len = shape[1]\n", + "\n", + " # *** Calculate inv_freq inside call ***\n", + " inv_freq_base = tf.range(0, self.dim, 2, dtype=tf.float32)\n", + " inv_freq = 1.0 / (self.temperature ** (inv_freq_base / self.dim))\n", + " # Ensure inv_freq has the correct shape [dim/2]\n", + " inv_freq = tf.cast(inv_freq, dtype=x.dtype) # Match dtype early\n", + "\n", + " # Use actual_seq_len for calculations\n", + " position = tf.range(actual_seq_len, dtype=x.dtype) # Match dtype\n", + "\n", + " # Calculate sinusoid input using einsum or broadcasting\n", + " # Einsum approach: Ensure correct dimensions [seq_len, dim/2]\n", + " sinusoid_inp = tf.einsum(\"i,j->ij\", position, inv_freq)\n", + "\n", + " # Calculate sin and cos based on the actual sequence length\n", + " sin = tf.sin(sinusoid_inp)\n", + " cos = tf.cos(sinusoid_inp)\n", + "\n", + " # Repeat sin/cos for interleaving: [a, b] -> [a, a, b, b]\n", + " # Result needs shape [actual_seq_len, dim]\n", + " sin = tf.repeat(sin, 2, axis=-1)\n", + " cos = tf.repeat(cos, 2, axis=-1)\n", + "\n", + " # Expand dims for batch and tile\n", + " # Output shape needs to be [batch_size, actual_seq_len, dim]\n", + " # Add batch dimension: [1, actual_seq_len, dim]\n", + " sin = tf.expand_dims(sin, axis=0)\n", + " cos = tf.expand_dims(cos, axis=0)\n", + "\n", + " # Tile to match the batch size: [batch_size, actual_seq_len, dim]\n", + " sin = tf.tile(sin, [batch_size, 1, 1])\n", + " cos = tf.tile(cos, [batch_size, 1, 1])\n", + "\n", + " # Casting to x.dtype was already done for inv_freq, sin/cos will inherit\n", + " # sin = tf.cast(sin, x.dtype) # Already done via calculation chain\n", + " # cos = tf.cast(cos, x.dtype) # Already done via calculation chain\n", + "\n", + " # Return sin and cos needed by InterleavedRoPE\n", + " return sin, cos\n", + "\n", + " def get_config(self):\n", + " config = super().get_config()\n", + " config.update({\n", + " \"dim\": self.dim,\n", + " \"max_seq_len\": self.max_seq_len,\n", + " \"temperature\": self.temperature,\n", + " })\n", + " return config\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " return cls(**config)\n", + "\n", + "\n", + "# iRoPE helper functions\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='split_alternate')\n", + "def split_alternate(x):\n", + " shape = tf.shape(x)\n", + " x = tf.reshape(x, [shape[0], shape[1], shape[2] // 2, 2])\n", + " x = tf.transpose(x, [0, 1, 3, 2])\n", + " x = tf.reshape(x, [shape[0], shape[1], -1])\n", + " return x\n", + "\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='rotate_half')\n", + "def rotate_half(x):\n", + " x = split_alternate(x)\n", + " d = tf.shape(x)[-1]\n", + " rotated_x = tf.concat([-x[..., d // 2:], x[..., :d // 2]], axis=-1)\n", + " return tf.reshape(rotated_x, tf.shape(x))\n", + "\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='apply_rotary_pos_emb')\n", + "def apply_rotary_pos_emb(x, sin, cos):\n", + " cos = tf.reshape(cos, [tf.shape(cos)[0], tf.shape(cos)[1], -1])\n", + " sin = tf.reshape(sin, [tf.shape(sin)[0], tf.shape(sin)[1], -1])\n", + " x_rotated = x * cos + rotate_half(x) * sin\n", + " return x_rotated\n", + "\n", + "\n", + "# interleaved Rotary Postional Embedding (iRoPE)\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='InterleavedRoPE')\n", + "class InterleavedRoPE(tf.keras.layers.Layer):\n", + " def __init__(self, dim, max_seq_len=1024, **kwargs):\n", + " super().__init__(**kwargs)\n", + " if dim % 2 != 0:\n", + " raise ValueError(f\"Embedding dimension `dim` ({dim}) must be even for InterleavedRoPE.\")\n", + " self.dim = dim\n", + " self.max_seq_len = max_seq_len\n", + " # Instantiate the RotaryEmbedding layer\n", + " # Ensure the name is consistent if needed for saving/loading\n", + " self.rotary_emb = RotaryEmbedding(dim, max_seq_len, name=\"rotary_embedding\")\n", + "\n", + " def call(self, x):\n", + " # Get sin and cos from the RotaryEmbedding layer's call method\n", + " # *** Pass only 'x'. RotaryEmbedding calculates seq_len internally. ***\n", + " sin, cos = self.rotary_emb(x)\n", + "\n", + " # Apply the positional embeddings\n", + " x_embedded = apply_rotary_pos_emb(x, sin, cos)\n", + " return x_embedded\n", + "\n", + " def get_config(self):\n", + " config = super().get_config()\n", + " config.update({\n", + " \"dim\": self.dim,\n", + " \"max_seq_len\": self.max_seq_len,\n", + " })\n", + " # Keras handles nested layer serialization automatically\n", + " return config\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " # Keras handles nested layer restoration automatically\n", + " return cls(**config)\n", + "```" + ], + "metadata": { + "id": "CXtYv20vpkMY" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Custom metric Perplexity (previously imported from the local package cerebrosllmutils):\n", + "\n", + "Since there is not a Perplexity metric in tensorflow.keras.metrics, we created our own, and one designed for this single - head model.\n", + "\n", + "## This is what it looks like:\n", + "\n", + "```python\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='Perplexity')\n", + "class Perplexity(tf.keras.metrics.Metric):\n", + " \"\"\"\n", + " Computes perplexity, defined as e^(categorical crossentropy).\n", + " \"\"\"\n", + "\n", + " def __init__(self, name='perplexity', **kwargs):\n", + " super().__init__(name=name, **kwargs)\n", + " self.total_crossentropy = self.add_weight(name='total_crossentropy', initializer='zeros')\n", + " self.count = self.add_weight(name='count', initializer='zeros')\n", + "\n", + " def update_state(self, y_true, y_pred, sample_weight=None):\n", + " # Calculate categorical crossentropy\n", + " crossentropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)\n", + "\n", + " # Update the running sum of crossentropy and the count of samples\n", + " self.total_crossentropy.assign_add(tf.reduce_sum(crossentropy))\n", + " self.count.assign_add(tf.cast(tf.shape(y_true)[0], dtype=tf.float32))\n", + "\n", + " def result(self):\n", + " # Compute the average crossentropy\n", + " average_crossentropy = self.total_crossentropy / self.count\n", + " # Compute perplexity as e^(average crossentropy)\n", + " return tf.exp(average_crossentropy)\n", + "\n", + " def reset_state(self):\n", + " # Reset the state variables\n", + " self.total_crossentropy.assign(0.0)\n", + " self.count.assign(0.0)\n", + "```\n" + ], + "metadata": { + "id": "uN3adqRLo61X" + } + }, + { + "cell_type": "code", + "source": [ + "# Custom metric: Perplexity\n", + "perplexity_metric = Perplexity()" + ], + "metadata": { + "id": "_8uTBW_to7iQ" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Stage I-a training: Neural Architecture Search (NAS)\n", + "\n", + "We now run the SimpleCerebrosRandomSearch to find the best performing architecture based on the training data and the base model. The search aims to minimize the perplexity in the train set. The search aims to minimize the perplexity in the training set. Obviously, in a full - scale run, we would use the validation set's value.\n", + "\n", + "- The Cerebros NAS will parse a block composed of rows (Levels) of multiple Dense layers (Units) with an overlapping, interleaved, interwoven topology both laterally between Dense layers on the same row and vertically between layers on different levels.\n", + "- This topology emulates the neuroscience principle of modularity.\n", + "- This topology allows local clusters of densely connected neurons to learn specialized fragments of a problem, while allowing efficient communication between these clusters to coordinate among themselves to compose a solution to a complex problem.\n", + "\n", + "For the deep technical details of how Cerebros NAS works: [How Cerebros NAS Works](https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/277-attempt-to-imporve-parameters-on--dev-branch-275/documentation/cerebros-technical-details.md)\n", + "\n", + "## This is what a neural network parsed by Cerebros looks like:\n", + "\n", + "- Green triangles: Input layers\n", + "- Blue squares: Concatenate layer -> [BatchNormalization | Dropout]\n", + "- Pink ovals: Hidden Dense layers\n", + "- Red oval: Output Dense layer\n" + ], + "metadata": { + "id": "tWjbHiHRMhR4" + } + }, + { + "cell_type": "markdown", + "source": [ + "![Brain-lookalike1.png]()" + ], + "metadata": { + "id": "1wR8EVItNNh_" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "## For a more readable view of that this looks like\n", + "\n", + "![image.png]()\n" + ], + "metadata": { + "id": "_bXR1QxaLPiq" + } + }, + { + "cell_type": "code", + "source": [ + "######## Instantiate Cerebros Neural Architecture Search #######\n", + "\n", + "# Project metadata\n", + "TIME = pendulum.now(tz='America/New_York').__str__()[:16].replace('T', '_').replace(':', '_').replace('-', '_')\n", + "PROJECT_NAME = f'{TIME}_cerebros_not-gpt'\n", + "meta_trial_number = 42\n", + "\n", + "# Initialize the AutoML search\n", + "cerebros_automl = SimpleCerebrosRandomSearch(\n", + " unit_type=DenseUnit,\n", + " input_shapes=INPUT_SHAPES,\n", + " output_shapes=OUTPUT_SHAPES,\n", + " training_data=x_train_packaged,\n", + " labels=y_train_packaged,\n", + " validation_split=0.2,\n", + " direction='minimize',\n", + " metric_to_rank_by=\"perplexity\",\n", + " minimum_levels=minimum_levels,\n", + " maximum_levels=maximum_levels,\n", + " minimum_units_per_level=minimum_units_per_level,\n", + " maximum_units_per_level=maximum_units_per_level,\n", + " minimum_neurons_per_unit=minimum_neurons_per_unit,\n", + " maximum_neurons_per_unit=maximum_neurons_per_unit,\n", + " activation=activation,\n", + " final_activation='softmax',\n", + " number_of_architecture_moities_to_try=moities_to_try,\n", + " number_of_tries_per_architecture_moity=tries_per_moity,\n", + " predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,\n", + " predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,\n", + " predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,\n", + " max_consecutive_lateral_connections=max_consecutive_lateral_connections,\n", + " p_lateral_connection=p_lateral_connection,\n", + " p_lateral_connection_decay=zero_95_exp_decay,\n", + " num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,\n", + " learning_rate=learning_rate,\n", + " loss=tf.keras.losses.CategoricalCrossentropy(),\n", + " metrics=[tf.keras.metrics.CategoricalAccuracy(), perplexity_metric],\n", + " epochs=epochs,\n", + " project_name=f\"{PROJECT_NAME}_meta_{meta_trial_number}\",\n", + " model_graphs='model_graphs',\n", + " batch_size=batch_size,\n", + " gradient_accumulation_steps=gradient_accumulation_steps,\n", + " meta_trial_number=meta_trial_number,\n", + " base_models=[cerebros_base_model],\n", + " train_data_dtype=tf.int32\n", + ")" + ], + "metadata": { + "id": "XV2q_5WEwBJ0" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run the Cerebros Neural Architecture Search\n" + ], + "metadata": { + "id": "TJVLfmJ2virA" + } + }, + { + "cell_type": "code", + "source": [ + "cerebros_t0 = time.time()\n", + "phase_i_a_result_0 = cerebros_automl.run_random_search()\n", + "cerebros_t1 = time.time()\n", + "\n", + "# Report results\n", + "cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0) / 60\n", + "models_tried = moities_to_try * tries_per_moity\n", + "cerebros_time_per_model = cerebros_time_all_models_min / models_tried\n", + "phase_i_a_result = float(phase_i_a_result_0)\n", + "\n", + "print(f\"Cerebros trained {models_tried} models in {cerebros_time_all_models_min:.2f} min. Average time per model: {cerebros_time_per_model:.2f} min.\")\n", + "print(f'Cerebros best perplexity achieved in Phase I-a is {phase_i_a_result}')" + ], + "metadata": { + "id": "ulL0EGnow5L7", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "d56dd1ec-2f7b-4a3c-ecc6-75e595910367" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\rGlobal task progress: 0%|\u001b[38;2;22;206;235m \u001b[0m| 0/3 [00:00nnf>ceil\n", + "k is: 0 value is: [{'1': }]\n", + "0\n", + "k is: 1 value is: [{'2': }, {'2': }]\n", + "1\n", + "Trying to create level 1\n", + "We think level 1's predecessors are: [0]\n", + "k is: 2 value is: [{'128260': }]\n", + "2\n", + "Trying to create Final level 2\n", + "Trying to create level 2\n", + "We think level final level 2's predecessors are: [0, 1]\n", + "levels:\n", + "[0, 1, 2]\n", + "{'0': 'InputUnitModule'}\n", + "InputLevel.input_shapes [(40,)]\n", + "{'2': }\n", + "{'2': }\n", + "Debug: I am 2 selecting 1\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", + "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", + "Debug: successor_connectivity_errors_2d []\n", + "$$$$$$>>>>> Base model: \n", + "InputUnit.input_shape: (40,)\n", + "{'2': }\n", + "{'2': }\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_1 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "{'128260': }\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_FinalDenseLevel_0000000000000002_tr_0_FinalDenseUnit_0000000000000002_tr_0_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "inputs\n", + "\n", + "\n", + "outputs\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_0_nn_materialized\"\u001b[0m\n" + ], + "text/html": [ + "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_0_nn_materialized\"\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", + "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", + "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", + "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m0\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" + ], + "text/html": [ + "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
+              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
+              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
+              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[0][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" + ], + "text/html": [ + "
 Total params: 52,476,644 (200.18 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" + ], + "text/html": [ + "
 Trainable params: 52,474,124 (200.17 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" + ], + "text/html": [ + "
 Non-trainable params: 2,520 (9.84 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n", + "Epoch 1/41\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", + "Expected: NeuralNetworkFuture_0000000000000nan_tr_0_InputLevel_0000000000000000_tr_0_InputUnit_0000000000000000_tr_0_0_inp\n", + "Received: inputs=('Tensor(shape=(None, 40))',)\n", + " warnings.warn(msg)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 752ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.7672 - perplexity: 128956.3438 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7557 - val_perplexity: 127482.9922\n", + "Epoch 2/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 547ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.6423 - perplexity: 113970.0938 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7555 - val_perplexity: 127447.9844\n", + "Epoch 3/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 785ms/step - categorical_accuracy: 0.1574 - loss: 11.5549 - perplexity: 104629.7031 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7555 - val_perplexity: 127457.9531\n", + "Epoch 4/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 510ms/step - categorical_accuracy: 0.1518 - loss: 11.2911 - perplexity: 80904.3125 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7396 - val_perplexity: 125437.5078\n", + "Epoch 5/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 594ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.7358 - perplexity: 125450.3906 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7296 - val_perplexity: 124199.8984\n", + "Epoch 6/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 869ms/step - categorical_accuracy: 0.1185 - loss: 11.0556 - perplexity: 65319.5391 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7240 - val_perplexity: 123501.3828\n", + "Epoch 7/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 589ms/step - categorical_accuracy: 0.0506 - loss: 11.3671 - perplexity: 90319.2578 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7149 - val_perplexity: 122378.4219\n", + "Epoch 8/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 539ms/step - categorical_accuracy: 0.0000e+00 - loss: 11.1610 - perplexity: 70926.6328 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7186 - val_perplexity: 122839.8203\n", + "Epoch 9/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 629ms/step - categorical_accuracy: 0.1496 - loss: 10.9728 - perplexity: 66133.8672 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7250 - val_perplexity: 123618.0391\n", + "Epoch 10/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 561ms/step - categorical_accuracy: 0.1475 - loss: 10.0717 - perplexity: 24002.7051 - val_categorical_accuracy: 0.1667 - val_loss: 11.7307 - val_perplexity: 124332.1562\n", + "Epoch 11/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 558ms/step - categorical_accuracy: 0.2024 - loss: 10.4731 - perplexity: 36918.5938 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7395 - val_perplexity: 125429.9766\n", + "Epoch 12/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 530ms/step - categorical_accuracy: 0.0000e+00 - loss: 10.1507 - perplexity: 27366.1113 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7502 - val_perplexity: 126783.1797\n", + "Epoch 13/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 750ms/step - categorical_accuracy: 0.0734 - loss: 10.4913 - perplexity: 43854.1094 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7526 - val_perplexity: 127089.4531\n", + "Epoch 14/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 583ms/step - categorical_accuracy: 0.3086 - loss: 9.0654 - perplexity: 9803.9824 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7553 - val_perplexity: 127423.6797\n", + "Epoch 15/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 528ms/step - categorical_accuracy: 0.2697 - loss: 9.0867 - perplexity: 10961.3613 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7623 - val_perplexity: 128316.8125\n", + "Epoch 16/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 582ms/step - categorical_accuracy: 0.0685 - loss: 9.1616 - perplexity: 10116.4492 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.8354 - val_perplexity: 138047.7344\n", + "Epoch 17/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 797ms/step - categorical_accuracy: 0.1518 - loss: 7.9130 - perplexity: 2808.9939 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.8843 - val_perplexity: 144976.3594\n", + "Epoch 18/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 1s/step - categorical_accuracy: 0.2169 - loss: 7.4165 - perplexity: 1843.1222 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.9479 - val_perplexity: 154489.3906\n", + "Epoch 19/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 510ms/step - categorical_accuracy: 0.1996 - loss: 8.1748 - perplexity: 4106.3154 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.0354 - val_perplexity: 168615.2344\n", + "Epoch 20/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 742ms/step - categorical_accuracy: 0.0839 - loss: 7.6041 - perplexity: 2107.0347 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.1744 - val_perplexity: 193765.5312\n", + "Epoch 21/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 601ms/step - categorical_accuracy: 0.2080 - loss: 7.4821 - perplexity: 1883.4858 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.2587 - val_perplexity: 210814.2656\n", + "Epoch 22/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 526ms/step - categorical_accuracy: 0.2036 - loss: 7.1881 - perplexity: 1867.2930 - val_categorical_accuracy: 0.1667 - val_loss: 12.3405 - val_perplexity: 228771.9219\n", + "Epoch 23/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 792ms/step - categorical_accuracy: 0.1919 - loss: 7.0572 - perplexity: 1222.1584 - val_categorical_accuracy: 0.1667 - val_loss: 12.4140 - val_perplexity: 246219.7031\n", + "Epoch 24/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 688ms/step - categorical_accuracy: 0.1685 - loss: 5.6640 - perplexity: 308.2304 - val_categorical_accuracy: 0.1667 - val_loss: 12.5863 - val_perplexity: 292515.5625\n", + "Epoch 25/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 609ms/step - categorical_accuracy: 0.1407 - loss: 6.4666 - perplexity: 751.4036 - val_categorical_accuracy: 0.1667 - val_loss: 12.6761 - val_perplexity: 320013.0000\n", + "Epoch 26/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 895ms/step - categorical_accuracy: 0.0839 - loss: 5.3843 - perplexity: 352.6757 - val_categorical_accuracy: 0.1667 - val_loss: 12.7511 - val_perplexity: 344943.8125\n", + "Epoch 27/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 516ms/step - categorical_accuracy: 0.2120 - loss: 5.6307 - perplexity: 300.5551 - val_categorical_accuracy: 0.1667 - val_loss: 12.8756 - val_perplexity: 390664.9688\n", + "Epoch 28/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 614ms/step - categorical_accuracy: 0.1685 - loss: 4.4140 - perplexity: 99.5634 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1954 - val_perplexity: 537862.5000\n", + "Epoch 29/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 517ms/step - categorical_accuracy: 0.0568 - loss: 5.8209 - perplexity: 412.2969 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3590 - val_perplexity: 633498.9375\n", + "Epoch 30/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 748ms/step - categorical_accuracy: 0.1864 - loss: 4.9144 - perplexity: 157.2443 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5253 - val_perplexity: 748103.1875\n", + "Epoch 31/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 507ms/step - categorical_accuracy: 0.1052 - loss: 8.2503 - perplexity: 22384.6094 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6228 - val_perplexity: 824754.3750\n", + "Epoch 32/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 612ms/step - categorical_accuracy: 0.4431 - loss: 4.0581 - perplexity: 75.1973 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0377 - val_perplexity: 1248790.8750\n", + "Epoch 33/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 766ms/step - categorical_accuracy: 0.2086 - loss: 5.6123 - perplexity: 301.7467 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2131 - val_perplexity: 1488169.6250\n", + "Epoch 34/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 569ms/step - categorical_accuracy: 0.2919 - loss: 4.4319 - perplexity: 154.5172 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2928 - val_perplexity: 1611684.3750\n", + "Epoch 35/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 585ms/step - categorical_accuracy: 0.1802 - loss: 5.1381 - perplexity: 190.7273 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.4868 - val_perplexity: 1956789.0000\n", + "Epoch 36/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 639ms/step - categorical_accuracy: 0.1719 - loss: 4.6314 - perplexity: 111.0518 - val_categorical_accuracy: 0.1667 - val_loss: 14.5656 - val_perplexity: 2117109.5000\n", + "Epoch 37/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 693ms/step - categorical_accuracy: 0.0839 - loss: 6.8925 - perplexity: 1205.9113 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6420 - val_perplexity: 2285232.2500\n", + "Epoch 38/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 594ms/step - categorical_accuracy: 0.2530 - loss: 5.8083 - perplexity: 927.8478 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6140 - val_perplexity: 2222210.0000\n", + "Epoch 39/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 557ms/step - categorical_accuracy: 0.1913 - loss: 4.0802 - perplexity: 62.5591 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5886 - val_perplexity: 2166540.2500\n", + "Epoch 40/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 757ms/step - categorical_accuracy: 0.2987 - loss: 4.2323 - perplexity: 90.9604 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6538 - val_perplexity: 2312421.2500\n", + "Epoch 41/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 529ms/step - categorical_accuracy: 0.2453 - loss: 3.7488 - perplexity: 50.1163 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.6478 - val_perplexity: 2298534.5000\n", + "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000000_subtrial_0000000000000000.txt\n", + "returning trial 0 oracles\n", + " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", + "0 0.000000 11.769061 129192.796875 0.000000 \n", + "1 0.000000 11.635833 113077.960938 0.000000 \n", + "2 0.130435 11.652204 114944.367188 0.000000 \n", + "3 0.130435 11.464634 95285.593750 0.000000 \n", + "4 0.000000 11.768666 129141.796875 0.000000 \n", + "5 0.130435 10.994949 59572.500000 0.000000 \n", + "6 0.043478 11.276978 78982.257812 0.000000 \n", + "7 0.000000 11.120511 67542.414062 0.000000 \n", + "8 0.173913 10.726726 45557.273438 0.000000 \n", + "9 0.217391 10.059676 23380.933594 0.166667 \n", + "10 0.173913 10.355123 31417.570312 0.000000 \n", + "11 0.000000 10.472779 35340.300781 0.000000 \n", + "12 0.086957 10.171259 26140.964844 0.000000 \n", + "13 0.217391 9.254299 10449.392578 0.000000 \n", + "14 0.217391 8.896774 7308.360840 0.000000 \n", + "15 0.130435 9.018457 8254.035156 0.000000 \n", + "16 0.130435 8.039083 3099.770996 0.000000 \n", + "17 0.217391 7.848331 2561.456787 0.000000 \n", + "18 0.173913 7.948806 2832.192139 0.000000 \n", + "19 0.043478 7.698378 2204.769043 0.000000 \n", + "20 0.173913 7.669386 2141.766846 0.000000 \n", + "21 0.260870 6.773150 874.061218 0.166667 \n", + "22 0.217391 7.382279 1607.248413 0.166667 \n", + "23 0.130435 6.034015 417.387543 0.166667 \n", + "24 0.130435 6.000526 403.641022 0.166667 \n", + "25 0.043478 6.586512 725.246826 0.166667 \n", + "26 0.260870 5.741646 311.576935 0.166667 \n", + "27 0.130435 5.138083 170.388733 0.000000 \n", + "28 0.086957 5.670679 290.231415 0.000000 \n", + "29 0.217391 5.602477 271.096985 0.000000 \n", + "30 0.173913 6.986033 1081.422852 0.000000 \n", + "31 0.304348 4.127844 62.044033 0.000000 \n", + "32 0.217391 5.934126 377.709869 0.000000 \n", + "33 0.217391 5.564253 260.930054 0.000000 \n", + "34 0.173913 5.642823 282.258331 0.000000 \n", + "35 0.173913 4.475579 87.845474 0.166667 \n", + "36 0.043478 6.194771 490.179321 0.000000 \n", + "37 0.217391 5.472395 238.029572 0.000000 \n", + "38 0.173913 4.001881 54.700928 0.000000 \n", + "39 0.304348 3.707729 40.761116 0.000000 \n", + "40 0.260870 4.130568 62.213223 0.000000 \n", + "\n", + " val_loss val_perplexity trial_number subtrial_number \\\n", + "0 11.755738 1.274830e+05 0 0 \n", + "1 11.755464 1.274480e+05 0 0 \n", + "2 11.755542 1.274580e+05 0 0 \n", + "3 11.739563 1.254375e+05 0 0 \n", + "4 11.729648 1.241999e+05 0 0 \n", + "5 11.724008 1.235014e+05 0 0 \n", + "6 11.714873 1.223784e+05 0 0 \n", + "7 11.718637 1.228398e+05 0 0 \n", + "8 11.724952 1.236180e+05 0 0 \n", + "9 11.730713 1.243322e+05 0 0 \n", + "10 11.739503 1.254300e+05 0 0 \n", + "11 11.750234 1.267832e+05 0 0 \n", + "12 11.752646 1.270895e+05 0 0 \n", + "13 11.755273 1.274237e+05 0 0 \n", + "14 11.762258 1.283168e+05 0 0 \n", + "15 11.835355 1.380477e+05 0 0 \n", + "16 11.884326 1.449764e+05 0 0 \n", + "17 11.947881 1.544894e+05 0 0 \n", + "18 12.035375 1.686152e+05 0 0 \n", + "19 12.174404 1.937655e+05 0 0 \n", + "20 12.258733 2.108143e+05 0 0 \n", + "21 12.340481 2.287719e+05 0 0 \n", + "22 12.413980 2.462197e+05 0 0 \n", + "23 12.586273 2.925156e+05 0 0 \n", + "24 12.676117 3.200130e+05 0 0 \n", + "25 12.751137 3.449438e+05 0 0 \n", + "26 12.875606 3.906650e+05 0 0 \n", + "27 13.195358 5.378625e+05 0 0 \n", + "28 13.359014 6.334989e+05 0 0 \n", + "29 13.525296 7.481032e+05 0 0 \n", + "30 13.622841 8.247544e+05 0 0 \n", + "31 14.037686 1.248791e+06 0 0 \n", + "32 14.213058 1.488170e+06 0 0 \n", + "33 14.292789 1.611684e+06 0 0 \n", + "34 14.486815 1.956789e+06 0 0 \n", + "35 14.565562 2.117110e+06 0 0 \n", + "36 14.641978 2.285232e+06 0 0 \n", + "37 14.614013 2.222210e+06 0 0 \n", + "38 14.588642 2.166540e+06 0 0 \n", + "39 14.653806 2.312421e+06 0 0 \n", + "40 14.647781 2.298534e+06 0 0 \n", + "\n", + " model_name \n", + "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "Global task progress: 33%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–Ž \u001b[0m| 1/3 [03:54<07:49, 234.85s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SimpleCerebrosRandomSearch.input_shapes: [(40,)]\n", + "nan\n", + ">nnf>ceil\n", + "k is: 0 value is: [{'1': }]\n", + "0\n", + "k is: 1 value is: [{'2': }, {'2': }]\n", + "1\n", + "Trying to create level 1\n", + "We think level 1's predecessors are: [0]\n", + "k is: 2 value is: [{'128260': }]\n", + "2\n", + "Trying to create Final level 2\n", + "Trying to create level 2\n", + "We think level final level 2's predecessors are: [0, 1]\n", + "levels:\n", + "[0, 1, 2]\n", + "{'0': 'InputUnitModule'}\n", + "InputLevel.input_shapes [(40,)]\n", + "{'2': }\n", + "{'2': }\n", + "Debug: I am 2 selecting 1\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", + "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", + "Debug: successor_connectivity_errors_2d []\n", + "$$$$$$>>>>> Base model: \n", + "InputUnit.input_shape: (40,)\n", + "{'2': }\n", + "{'2': }\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_DenseLevel_0000000000000001_tr_1_DenseUnit_0000000000000001_tr_1_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_DenseLevel_0000000000000001_tr_1_DenseUnit_0000000000000001_tr_1_1 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "{'128260': }\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_1_FinalDenseLevel_0000000000000002_tr_1_FinalDenseUnit_0000000000000002_tr_1_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "inputs\n", + "\n", + "\n", + "outputs\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_1_nn_materialized\"\u001b[0m\n" + ], + "text/html": [ + "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_1_nn_materialized\"\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", + "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", + "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", + "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m1\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" + ], + "text/html": [ + "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
+              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
+              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
+              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[1][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" + ], + "text/html": [ + "
 Total params: 52,476,644 (200.18 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" + ], + "text/html": [ + "
 Trainable params: 52,474,124 (200.17 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" + ], + "text/html": [ + "
 Non-trainable params: 2,520 (9.84 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n", + "Epoch 1/41\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", + "Expected: NeuralNetworkFuture_0000000000000nan_tr_1_InputLevel_0000000000000000_tr_1_InputUnit_0000000000000000_tr_1_0_inp\n", + "Received: inputs=('Tensor(shape=(None, 40))',)\n", + " warnings.warn(msg)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 1s/step - categorical_accuracy: 0.0000e+00 - loss: 11.7384 - perplexity: 329529.3125 - val_categorical_accuracy: 0.1667 - val_loss: 11.7688 - val_perplexity: 129164.8281\n", + "Epoch 2/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 595ms/step - categorical_accuracy: 0.1913 - loss: 11.2528 - perplexity: 77375.8594 - val_categorical_accuracy: 0.1667 - val_loss: 11.7502 - val_perplexity: 126778.7031\n", + "Epoch 3/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 518ms/step - categorical_accuracy: 0.2191 - loss: 10.8135 - perplexity: 50491.5156 - val_categorical_accuracy: 0.1667 - val_loss: 11.7425 - val_perplexity: 125805.1797\n", + "Epoch 4/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 522ms/step - categorical_accuracy: 0.1864 - loss: 10.2940 - perplexity: 30868.9629 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7451 - val_perplexity: 126128.5781\n", + "Epoch 5/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 820ms/step - categorical_accuracy: 0.1913 - loss: 9.7216 - perplexity: 16997.6719 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7362 - val_perplexity: 125020.5859\n", + "Epoch 6/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 645ms/step - categorical_accuracy: 0.1407 - loss: 8.9741 - perplexity: 8181.5312 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7171 - val_perplexity: 122652.5234\n", + "Epoch 7/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 593ms/step - categorical_accuracy: 0.1830 - loss: 8.4567 - perplexity: 4759.8066 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6908 - val_perplexity: 119465.5703\n", + "Epoch 8/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 685ms/step - categorical_accuracy: 0.0506 - loss: 8.2385 - perplexity: 4355.3149 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6441 - val_perplexity: 114018.0000\n", + "Epoch 9/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 651ms/step - categorical_accuracy: 0.2141 - loss: 7.1757 - perplexity: 1335.3220 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6267 - val_perplexity: 112051.2734\n", + "Epoch 10/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 545ms/step - categorical_accuracy: 0.1830 - loss: 7.3339 - perplexity: 1963.8916 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6375 - val_perplexity: 113263.3828\n", + "Epoch 11/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 565ms/step - categorical_accuracy: 0.2669 - loss: 6.6371 - perplexity: 870.7467 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.6597 - val_perplexity: 115809.4375\n", + "Epoch 12/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 653ms/step - categorical_accuracy: 0.1719 - loss: 5.9232 - perplexity: 380.9991 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7503 - val_perplexity: 126796.4766\n", + "Epoch 13/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 626ms/step - categorical_accuracy: 0.0839 - loss: 7.4954 - perplexity: 2688.5974 - val_categorical_accuracy: 0.1667 - val_loss: 11.8025 - val_perplexity: 133587.0156\n", + "Epoch 14/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 555ms/step - categorical_accuracy: 0.0963 - loss: 6.5658 - perplexity: 758.4783 - val_categorical_accuracy: 0.1667 - val_loss: 11.8975 - val_perplexity: 146902.5625\n", + "Epoch 15/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 599ms/step - categorical_accuracy: 0.2419 - loss: 4.4233 - perplexity: 101.5967 - val_categorical_accuracy: 0.1667 - val_loss: 11.9977 - val_perplexity: 162383.4688\n", + "Epoch 16/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 597ms/step - categorical_accuracy: 0.2018 - loss: 4.8811 - perplexity: 147.2505 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.2702 - val_perplexity: 213244.7812\n", + "Epoch 17/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 514ms/step - categorical_accuracy: 0.2419 - loss: 4.8847 - perplexity: 212.5692 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.4334 - val_perplexity: 251053.4531\n", + "Epoch 18/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 516ms/step - categorical_accuracy: 0.1725 - loss: 5.0510 - perplexity: 216.2864 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.5886 - val_perplexity: 293192.5625\n", + "Epoch 19/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 1s/step - categorical_accuracy: 0.3348 - loss: 4.1482 - perplexity: 66.5400 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.7669 - val_perplexity: 350434.6875\n", + "Epoch 20/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 518ms/step - categorical_accuracy: 0.2364 - loss: 6.1440 - perplexity: 556.4460 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1211 - val_perplexity: 499357.5000\n", + "Epoch 21/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 566ms/step - categorical_accuracy: 0.2752 - loss: 4.0937 - perplexity: 103.8000 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.2722 - val_perplexity: 580840.3125\n", + "Epoch 22/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 584ms/step - categorical_accuracy: 0.3582 - loss: 3.5086 - perplexity: 42.0227 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3929 - val_perplexity: 655350.3750\n", + "Epoch 23/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 574ms/step - categorical_accuracy: 0.2357 - loss: 3.6651 - perplexity: 42.0124 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5131 - val_perplexity: 739037.1875\n", + "Epoch 24/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 555ms/step - categorical_accuracy: 0.3743 - loss: 4.2759 - perplexity: 78.8337 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6073 - val_perplexity: 812073.0625\n", + "Epoch 25/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 619ms/step - categorical_accuracy: 0.2814 - loss: 6.1106 - perplexity: 702.3881 - val_categorical_accuracy: 0.1667 - val_loss: 13.6209 - val_perplexity: 823172.5625\n", + "Epoch 26/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 689ms/step - categorical_accuracy: 0.2647 - loss: 6.2123 - perplexity: 835.9423 - val_categorical_accuracy: 0.1667 - val_loss: 13.5922 - val_perplexity: 799900.9375\n", + "Epoch 27/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 824ms/step - categorical_accuracy: 0.3014 - loss: 3.9091 - perplexity: 57.1766 - val_categorical_accuracy: 0.1667 - val_loss: 13.5968 - val_perplexity: 803528.0625\n", + "Epoch 28/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 594ms/step - categorical_accuracy: 0.2864 - loss: 5.1544 - perplexity: 186.2288 - val_categorical_accuracy: 0.1667 - val_loss: 13.5879 - val_perplexity: 796426.8750\n", + "Epoch 29/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 973ms/step - categorical_accuracy: 0.2314 - loss: 5.0346 - perplexity: 261.9535 - val_categorical_accuracy: 0.1667 - val_loss: 13.5785 - val_perplexity: 788957.9375\n", + "Epoch 30/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 1s/step - categorical_accuracy: 0.3508 - loss: 3.9460 - perplexity: 55.9352 - val_categorical_accuracy: 0.1667 - val_loss: 13.5878 - val_perplexity: 796315.2500\n", + "Epoch 31/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 684ms/step - categorical_accuracy: 0.2141 - loss: 3.3061 - perplexity: 29.5618 - val_categorical_accuracy: 0.1667 - val_loss: 13.5959 - val_perplexity: 802850.9375\n", + "Epoch 32/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 659ms/step - categorical_accuracy: 0.1719 - loss: 4.1759 - perplexity: 72.8835 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.7057 - val_perplexity: 896031.1250\n", + "Epoch 33/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 725ms/step - categorical_accuracy: 0.1302 - loss: 5.0193 - perplexity: 177.1105 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.7885 - val_perplexity: 973393.5000\n", + "Epoch 34/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 632ms/step - categorical_accuracy: 0.2919 - loss: 2.9201 - perplexity: 24.4465 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.9237 - val_perplexity: 1114295.3750\n", + "Epoch 35/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 639ms/step - categorical_accuracy: 0.3197 - loss: 3.6359 - perplexity: 60.7448 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0890 - val_perplexity: 1314598.2500\n", + "Epoch 36/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 527ms/step - categorical_accuracy: 0.2364 - loss: 3.6853 - perplexity: 93.4177 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.1742 - val_perplexity: 1431418.1250\n", + "Epoch 37/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 605ms/step - categorical_accuracy: 0.2731 - loss: 3.3295 - perplexity: 31.0892 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2398 - val_perplexity: 1528469.6250\n", + "Epoch 38/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 755ms/step - categorical_accuracy: 0.5054 - loss: 4.2462 - perplexity: 96.5757 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3218 - val_perplexity: 1659098.5000\n", + "Epoch 39/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 606ms/step - categorical_accuracy: 0.3638 - loss: 3.2328 - perplexity: 26.5526 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3728 - val_perplexity: 1745870.1250\n", + "Epoch 40/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 745ms/step - categorical_accuracy: 0.5727 - loss: 1.9158 - perplexity: 9.5471 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5209 - val_perplexity: 2024707.8750\n", + "Epoch 41/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 671ms/step - categorical_accuracy: 0.1068 - loss: 5.0107 - perplexity: 172.7614 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5204 - val_perplexity: 2023570.8750\n", + "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000001_subtrial_0000000000000000.txt\n", + "returning trial 1 oracles\n", + " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", + "0 0.000000 11.719700 225372.531250 0.166667 \n", + "1 0.173913 11.155995 69982.140625 0.166667 \n", + "2 0.173913 10.995764 59621.039062 0.166667 \n", + "3 0.217391 10.042144 22974.583984 0.000000 \n", + "4 0.173913 9.805058 18125.181641 0.000000 \n", + "5 0.130435 9.198784 9885.100586 0.000000 \n", + "6 0.173913 8.641828 5663.671387 0.000000 \n", + "7 0.043478 8.808529 6691.075195 0.000000 \n", + "8 0.217391 7.256882 1417.828491 0.000000 \n", + "9 0.173913 6.904544 996.794250 0.000000 \n", + "10 0.217391 6.873430 966.256958 0.000000 \n", + "11 0.173913 5.982946 396.607025 0.000000 \n", + "12 0.043478 6.824471 920.089539 0.166667 \n", + "13 0.130435 6.259269 522.836731 0.166667 \n", + "14 0.217391 5.205779 182.322769 0.166667 \n", + "15 0.130435 5.462027 235.574463 0.000000 \n", + "16 0.217391 6.074162 434.485474 0.000000 \n", + "17 0.217391 5.354462 211.550262 0.000000 \n", + "18 0.304348 4.318021 75.040001 0.000000 \n", + "19 0.217391 5.875260 356.117035 0.000000 \n", + "20 0.217391 5.246053 189.815536 0.000000 \n", + "21 0.391304 4.035575 56.575462 0.000000 \n", + "22 0.173913 3.672752 39.360092 0.000000 \n", + "23 0.347826 4.800797 121.607239 0.000000 \n", + "24 0.260870 6.058529 427.745911 0.166667 \n", + "25 0.260870 6.874752 967.535400 0.166667 \n", + "26 0.304348 3.871903 48.033691 0.166667 \n", + "27 0.217391 5.597022 269.622284 0.166667 \n", + "28 0.260870 4.006342 54.945507 0.166667 \n", + "29 0.260870 4.286894 72.740173 0.166667 \n", + "30 0.217391 3.180355 24.055300 0.166667 \n", + "31 0.173913 4.073040 58.735218 0.000000 \n", + "32 0.173913 5.302594 200.857193 0.000000 \n", + "33 0.217391 3.763384 43.094006 0.000000 \n", + "34 0.217391 4.363249 78.511826 0.000000 \n", + "35 0.217391 5.450110 232.783875 0.000000 \n", + "36 0.260870 3.634080 37.866989 0.000000 \n", + "37 0.391304 5.082735 161.214310 0.000000 \n", + "38 0.391304 3.312840 27.463017 0.000000 \n", + "39 0.434783 2.846823 17.232950 0.000000 \n", + "40 0.086957 5.169964 175.908478 0.000000 \n", + "\n", + " val_loss val_perplexity trial_number subtrial_number \\\n", + "0 11.768844 1.291648e+05 1 0 \n", + "1 11.750198 1.267787e+05 1 0 \n", + "2 11.742490 1.258052e+05 1 0 \n", + "3 11.745057 1.261286e+05 1 0 \n", + "4 11.736234 1.250206e+05 1 0 \n", + "5 11.717111 1.226525e+05 1 0 \n", + "6 11.690784 1.194656e+05 1 0 \n", + "7 11.644112 1.140180e+05 1 0 \n", + "8 11.626712 1.120513e+05 1 0 \n", + "9 11.637473 1.132634e+05 1 0 \n", + "10 11.659701 1.158094e+05 1 0 \n", + "11 11.750339 1.267965e+05 1 0 \n", + "12 11.802508 1.335870e+05 1 0 \n", + "13 11.897525 1.469026e+05 1 0 \n", + "14 11.997716 1.623835e+05 1 0 \n", + "15 12.270196 2.132448e+05 1 0 \n", + "16 12.433421 2.510535e+05 1 0 \n", + "17 12.588585 2.931926e+05 1 0 \n", + "18 12.766930 3.504347e+05 1 0 \n", + "19 13.121078 4.993575e+05 1 0 \n", + "20 13.272231 5.808403e+05 1 0 \n", + "21 13.392925 6.553504e+05 1 0 \n", + "22 13.513103 7.390372e+05 1 0 \n", + "23 13.607346 8.120731e+05 1 0 \n", + "24 13.620921 8.231726e+05 1 0 \n", + "25 13.592243 7.999009e+05 1 0 \n", + "26 13.596767 8.035281e+05 1 0 \n", + "27 13.587891 7.964269e+05 1 0 \n", + "28 13.578468 7.889579e+05 1 0 \n", + "29 13.587750 7.963152e+05 1 0 \n", + "30 13.595924 8.028509e+05 1 0 \n", + "31 13.705730 8.960311e+05 1 0 \n", + "32 13.788544 9.733935e+05 1 0 \n", + "33 13.923733 1.114295e+06 1 0 \n", + "34 14.089040 1.314598e+06 1 0 \n", + "35 14.174176 1.431418e+06 1 0 \n", + "36 14.239779 1.528470e+06 1 0 \n", + "37 14.321785 1.659098e+06 1 0 \n", + "38 14.372764 1.745870e+06 1 0 \n", + "39 14.520935 2.024708e+06 1 0 \n", + "40 14.520374 2.023571e+06 1 0 \n", + "\n", + " model_name \n", + "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "Global task progress: 67%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‹ \u001b[0m| 2/3 [07:42<03:50, 230.58s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "SimpleCerebrosRandomSearch.input_shapes: [(40,)]\n", + "nan\n", + ">nnf>ceil\n", + "k is: 0 value is: [{'1': }]\n", + "0\n", + "k is: 1 value is: [{'2': }, {'2': }]\n", + "1\n", + "Trying to create level 1\n", + "We think level 1's predecessors are: [0]\n", + "k is: 2 value is: [{'128260': }]\n", + "2\n", + "Trying to create Final level 2\n", + "Trying to create level 2\n", + "We think level final level 2's predecessors are: [0, 1]\n", + "levels:\n", + "[0, 1, 2]\n", + "{'0': 'InputUnitModule'}\n", + "InputLevel.input_shapes [(40,)]\n", + "{'2': }\n", + "{'2': }\n", + "Debug: I am 2 selecting 1\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n", + "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n", + "Debug: successor_connectivity_errors_2d []\n", + "$$$$$$>>>>> Base model: \n", + "InputUnit.input_shape: (40,)\n", + "{'2': }\n", + "{'2': }\n", + "debug: meta_level_number\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_DenseLevel_0000000000000001_tr_2_DenseUnit_0000000000000001_tr_2_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_DenseLevel_0000000000000001_tr_2_DenseUnit_0000000000000001_tr_2_1 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "{'128260': }\n", + "debug: meta_level_number\n", + "Debug: successor_connectivity_errors_2d []\n", + "materialize:_NeuralNetworkFuture_0000000000000nan_tr_2_FinalDenseLevel_0000000000000002_tr_2_FinalDenseUnit_0000000000000002_tr_2_0 called\n", + "materialized network layers\n", + "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n", + "inputs\n", + "\n", + "\n", + "outputs\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1mModel: \"NeuralNetworkFuture_0000000000000nan_tr_2_nn_materialized\"\u001b[0m\n" + ], + "text/html": [ + "
Model: \"NeuralNetworkFuture_0000000000000nan_tr_2_nn_materialized\"\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n", + "โ”ƒ\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0mโ”ƒ\u001b[1m \u001b[0m\u001b[1mConnected to \u001b[0m\u001b[1m \u001b[0mโ”ƒ\n", + "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m40\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ - โ”‚\n", + "โ”‚ (\u001b[38;5;33mInputLayer\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ functional โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) โ”‚ \u001b[38;5;34m1,550,652\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mFunctional\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m] โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m432\u001b[0m) โ”‚ \u001b[38;5;34m1,728\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) โ”‚ \u001b[38;5;34m866\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m0\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mConcatenate\u001b[0m) โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ functional[\u001b[38;5;34m2\u001b[0m][\u001b[38;5;34m0\u001b[0m], โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ โ”‚ โ”‚ โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m396\u001b[0m) โ”‚ \u001b[38;5;34m1,584\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mBatchNormalizatioโ€ฆ\u001b[0m โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n", + "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128260\u001b[0m) โ”‚ \u001b[38;5;34m50,919,220\u001b[0m โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n", + "โ”‚ (\u001b[38;5;33mDense\u001b[0m) โ”‚ โ”‚ โ”‚ โ”‚\n", + "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n" + ], + "text/html": [ + "
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ณโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”“\n",
+              "โ”ƒ Layer (type)        โ”ƒ Output Shape      โ”ƒ    Param # โ”ƒ Connected to      โ”ƒ\n",
+              "โ”กโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ•‡โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”ฉ\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 40)        โ”‚          0 โ”‚ -                 โ”‚\n",
+              "โ”‚ (InputLayer)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ functional          โ”‚ (None, 12)        โ”‚  1,550,652 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Functional)        โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚          0 โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0]  โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 432)       โ”‚      1,728 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 2)         โ”‚        866 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚          0 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Concatenate)       โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ functional[2][0], โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚                     โ”‚                   โ”‚            โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 396)       โ”‚      1,584 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (BatchNormalizatioโ€ฆ โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค\n",
+              "โ”‚ NeuralNetworkFuturโ€ฆ โ”‚ (None, 128260)    โ”‚ 50,919,220 โ”‚ NeuralNetworkFutโ€ฆ โ”‚\n",
+              "โ”‚ (Dense)             โ”‚                   โ”‚            โ”‚                   โ”‚\n",
+              "โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m52,476,644\u001b[0m (200.18 MB)\n" + ], + "text/html": [ + "
 Total params: 52,476,644 (200.18 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m52,474,124\u001b[0m (200.17 MB)\n" + ], + "text/html": [ + "
 Trainable params: 52,474,124 (200.17 MB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m2,520\u001b[0m (9.84 KB)\n" + ], + "text/html": [ + "
 Non-trainable params: 2,520 (9.84 KB)\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n", + "Epoch 1/41\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/models/functional.py:241: UserWarning: The structure of `inputs` doesn't match the expected structure.\n", + "Expected: NeuralNetworkFuture_0000000000000nan_tr_2_InputLevel_0000000000000000_tr_2_InputUnit_0000000000000000_tr_2_0_inp\n", + "Received: inputs=('Tensor(shape=(None, 40))',)\n", + " warnings.warn(msg)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 1s/step - categorical_accuracy: 0.0000e+00 - loss: 11.7705 - perplexity: 321629.5625 - val_categorical_accuracy: 0.0000e+00 - val_loss: 11.7229 - val_perplexity: 123359.4219\n", + "Epoch 2/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 553ms/step - categorical_accuracy: 0.2203 - loss: 11.1997 - perplexity: 73499.6797 - val_categorical_accuracy: 0.1667 - val_loss: 11.6443 - val_perplexity: 114043.3438\n", + "Epoch 3/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 810ms/step - categorical_accuracy: 0.0568 - loss: 10.8859 - perplexity: 55947.3047 - val_categorical_accuracy: 0.1667 - val_loss: 11.6099 - val_perplexity: 110179.2891\n", + "Epoch 4/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 764ms/step - categorical_accuracy: 0.0857 - loss: 10.3024 - perplexity: 32385.9180 - val_categorical_accuracy: 0.1667 - val_loss: 11.5822 - val_perplexity: 107167.9375\n", + "Epoch 5/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 804ms/step - categorical_accuracy: 0.4326 - loss: 9.0836 - perplexity: 8933.4072 - val_categorical_accuracy: 0.1667 - val_loss: 11.5889 - val_perplexity: 107891.9219\n", + "Epoch 6/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 748ms/step - categorical_accuracy: 0.2370 - loss: 8.2757 - perplexity: 3973.7712 - val_categorical_accuracy: 0.1667 - val_loss: 11.6114 - val_perplexity: 110344.8047\n", + "Epoch 7/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 697ms/step - categorical_accuracy: 0.3215 - loss: 7.9711 - perplexity: 3110.2710 - val_categorical_accuracy: 0.1667 - val_loss: 11.6354 - val_perplexity: 113026.3203\n", + "Epoch 8/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 847ms/step - categorical_accuracy: 0.3126 - loss: 7.4265 - perplexity: 1791.7644 - val_categorical_accuracy: 0.1667 - val_loss: 11.7362 - val_perplexity: 125022.5000\n", + "Epoch 9/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 671ms/step - categorical_accuracy: 0.3715 - loss: 6.4991 - perplexity: 682.2664 - val_categorical_accuracy: 0.1667 - val_loss: 11.8366 - val_perplexity: 138215.8281\n", + "Epoch 10/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 661ms/step - categorical_accuracy: 0.0734 - loss: 6.6783 - perplexity: 959.8943 - val_categorical_accuracy: 0.1667 - val_loss: 11.9409 - val_perplexity: 153410.4375\n", + "Epoch 11/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 672ms/step - categorical_accuracy: 0.2731 - loss: 5.4216 - perplexity: 253.3555 - val_categorical_accuracy: 0.1667 - val_loss: 12.0696 - val_perplexity: 174486.5469\n", + "Epoch 12/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 771ms/step - categorical_accuracy: 0.2197 - loss: 5.1631 - perplexity: 218.4438 - val_categorical_accuracy: 0.1667 - val_loss: 12.3783 - val_perplexity: 237591.3281\n", + "Epoch 13/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 590ms/step - categorical_accuracy: 0.0990 - loss: 6.2295 - perplexity: 540.8942 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.5687 - val_perplexity: 287426.0312\n", + "Epoch 14/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 779ms/step - categorical_accuracy: 0.1786 - loss: 6.4879 - perplexity: 5995.6064 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.7386 - val_perplexity: 340639.4062\n", + "Epoch 15/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 748ms/step - categorical_accuracy: 0.1752 - loss: 5.1388 - perplexity: 226.8788 - val_categorical_accuracy: 0.0000e+00 - val_loss: 12.8619 - val_perplexity: 385366.4375\n", + "Epoch 16/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 621ms/step - categorical_accuracy: 0.1641 - loss: 5.7236 - perplexity: 863.9962 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.1398 - val_perplexity: 508785.5938\n", + "Epoch 17/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 689ms/step - categorical_accuracy: 0.4921 - loss: 2.9571 - perplexity: 20.0698 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.3148 - val_perplexity: 606077.4375\n", + "Epoch 18/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 844ms/step - categorical_accuracy: 0.3659 - loss: 4.9595 - perplexity: 819.0781 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.5068 - val_perplexity: 734419.0000\n", + "Epoch 19/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 593ms/step - categorical_accuracy: 0.3014 - loss: 4.9173 - perplexity: 152.2117 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.6640 - val_perplexity: 859409.2500\n", + "Epoch 20/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 847ms/step - categorical_accuracy: 0.2308 - loss: 3.7793 - perplexity: 60.0206 - val_categorical_accuracy: 0.0000e+00 - val_loss: 13.9386 - val_perplexity: 1131028.2500\n", + "Epoch 21/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 918ms/step - categorical_accuracy: 0.3832 - loss: 3.4479 - perplexity: 80.4731 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.0840 - val_perplexity: 1307933.0000\n", + "Epoch 22/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 560ms/step - categorical_accuracy: 0.3860 - loss: 4.3510 - perplexity: 90.9878 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.1620 - val_perplexity: 1414104.8750\n", + "Epoch 23/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 635ms/step - categorical_accuracy: 0.4443 - loss: 2.9553 - perplexity: 23.0736 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.2588 - val_perplexity: 1557883.3750\n", + "Epoch 24/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 636ms/step - categorical_accuracy: 0.4983 - loss: 2.2404 - perplexity: 9.9262 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.3779 - val_perplexity: 1754904.3750\n", + "Epoch 25/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 623ms/step - categorical_accuracy: 0.2909 - loss: 5.2172 - perplexity: 247.5778 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.4472 - val_perplexity: 1880756.3750\n", + "Epoch 26/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 806ms/step - categorical_accuracy: 0.3048 - loss: 3.1858 - perplexity: 25.7062 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5364 - val_perplexity: 2056196.2500\n", + "Epoch 27/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 843ms/step - categorical_accuracy: 0.1185 - loss: 3.0064 - perplexity: 25.4380 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.5902 - val_perplexity: 2169912.7500\n", + "Epoch 28/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 866ms/step - categorical_accuracy: 0.5033 - loss: 2.9283 - perplexity: 35.1612 - val_categorical_accuracy: 0.1667 - val_loss: 14.6578 - val_perplexity: 2321627.0000\n", + "Epoch 29/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 794ms/step - categorical_accuracy: 0.3320 - loss: 3.2803 - perplexity: 27.8907 - val_categorical_accuracy: 0.1667 - val_loss: 14.7196 - val_perplexity: 2469625.0000\n", + "Epoch 30/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 660ms/step - categorical_accuracy: 0.2752 - loss: 5.4753 - perplexity: 249.7908 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.8572 - val_perplexity: 2834024.2500\n", + "Epoch 31/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 748ms/step - categorical_accuracy: 0.2925 - loss: 5.2035 - perplexity: 302.8727 - val_categorical_accuracy: 0.0000e+00 - val_loss: 14.9761 - val_perplexity: 3191841.5000\n", + "Epoch 32/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 585ms/step - categorical_accuracy: 0.2715 - loss: 3.0830 - perplexity: 22.1130 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.0934 - val_perplexity: 3589043.2500\n", + "Epoch 33/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 837ms/step - categorical_accuracy: 0.3638 - loss: 2.0138 - perplexity: 7.6831 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.1927 - val_perplexity: 3963894.5000\n", + "Epoch 34/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 622ms/step - categorical_accuracy: 0.4165 - loss: 2.3430 - perplexity: 12.4422 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.2933 - val_perplexity: 4383348.5000\n", + "Epoch 35/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 730ms/step - categorical_accuracy: 0.4832 - loss: 3.8156 - perplexity: 57.3130 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4055 - val_perplexity: 4903895.5000\n", + "Epoch 36/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 831ms/step - categorical_accuracy: 0.1641 - loss: 4.5182 - perplexity: 317.0210 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4245 - val_perplexity: 4998003.5000\n", + "Epoch 37/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 621ms/step - categorical_accuracy: 0.2752 - loss: 2.9753 - perplexity: 25.5228 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4590 - val_perplexity: 5173094.0000\n", + "Epoch 38/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 666ms/step - categorical_accuracy: 0.2280 - loss: 2.4058 - perplexity: 11.6680 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4232 - val_perplexity: 4991435.0000\n", + "Epoch 39/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 565ms/step - categorical_accuracy: 0.3592 - loss: 3.6356 - perplexity: 40.6227 - val_categorical_accuracy: 0.0000e+00 - val_loss: 15.4089 - val_perplexity: 4920268.0000\n", + "Epoch 40/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 665ms/step - categorical_accuracy: 0.2691 - loss: 3.4659 - perplexity: 47.4784 - val_categorical_accuracy: 0.1667 - val_loss: 15.3797 - val_perplexity: 4778703.0000\n", + "Epoch 41/41\n", + "\u001b[1m5/5\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 693ms/step - categorical_accuracy: 0.4310 - loss: 2.7924 - perplexity: 20.8595 - val_categorical_accuracy: 0.1667 - val_loss: 15.4324 - val_perplexity: 5037467.5000\n", + "this is neural_network_spec_file 2025_11_23 16_55_cerebros_not-gpt_meta_42/model_architectures/tr_0000000000000002_subtrial_0000000000000000.txt\n", + "returning trial 2 oracles\n", + " categorical_accuracy loss perplexity val_categorical_accuracy \\\n", + "0 0.000000 11.761698 226943.984375 0.000000 \n", + "1 0.260870 11.090140 65521.941406 0.166667 \n", + "2 0.086957 10.702163 44451.890625 0.166667 \n", + "3 0.173913 9.990288 21813.576172 0.166667 \n", + "4 0.347826 9.246581 10369.053711 0.166667 \n", + "5 0.260870 8.266317 3890.594971 0.166667 \n", + "6 0.347826 7.704062 2217.337646 0.166667 \n", + "7 0.304348 7.122604 1239.675415 0.166667 \n", + "8 0.347826 6.225540 505.495789 0.166667 \n", + "9 0.086957 6.562615 708.120972 0.166667 \n", + "10 0.260870 5.511858 247.610764 0.166667 \n", + "11 0.217391 5.295715 199.480270 0.166667 \n", + "12 0.130435 6.463620 641.378784 0.000000 \n", + "13 0.260870 5.026217 152.355484 0.000000 \n", + "14 0.217391 5.910099 368.742676 0.000000 \n", + "15 0.217391 4.887461 132.616455 0.000000 \n", + "16 0.434783 3.347833 28.441027 0.000000 \n", + "17 0.347826 4.313054 74.668182 0.000000 \n", + "18 0.304348 4.665129 106.179253 0.000000 \n", + "19 0.217391 4.334057 76.253006 0.000000 \n", + "20 0.391304 2.807739 16.572411 0.000000 \n", + "21 0.391304 4.215992 67.761353 0.000000 \n", + "22 0.391304 3.522572 33.871445 0.000000 \n", + "23 0.478261 2.265072 9.631822 0.000000 \n", + "24 0.347826 5.091538 162.639801 0.000000 \n", + "25 0.347826 2.982907 19.745134 0.000000 \n", + "26 0.130435 3.861120 47.518566 0.000000 \n", + "27 0.434783 4.315707 74.866554 0.166667 \n", + "28 0.304348 3.004366 20.173416 0.166667 \n", + "29 0.217391 5.262289 192.922501 0.000000 \n", + "30 0.260870 5.697386 298.087250 0.000000 \n", + "31 0.347826 3.149921 23.334219 0.000000 \n", + "32 0.391304 2.063896 7.876601 0.000000 \n", + "33 0.391304 3.141111 23.129541 0.000000 \n", + "34 0.391304 3.663168 38.984657 0.000000 \n", + "35 0.217391 3.455597 31.677193 0.000000 \n", + "36 0.217391 3.796592 44.549091 0.000000 \n", + "37 0.217391 2.545129 12.744876 0.000000 \n", + "38 0.260870 4.018140 55.597588 0.000000 \n", + "39 0.173913 3.072442 21.594568 0.166667 \n", + "40 0.434783 2.709372 15.019834 0.166667 \n", + "\n", + " val_loss val_perplexity trial_number subtrial_number \\\n", + "0 11.722857 1.233594e+05 2 0 \n", + "1 11.644334 1.140433e+05 2 0 \n", + "2 11.609864 1.101793e+05 2 0 \n", + "3 11.582150 1.071679e+05 2 0 \n", + "4 11.588885 1.078919e+05 2 0 \n", + "5 11.611365 1.103448e+05 2 0 \n", + "6 11.635376 1.130263e+05 2 0 \n", + "7 11.736249 1.250225e+05 2 0 \n", + "8 11.836572 1.382158e+05 2 0 \n", + "9 11.940872 1.534104e+05 2 0 \n", + "10 12.069603 1.744865e+05 2 0 \n", + "11 12.378307 2.375913e+05 2 0 \n", + "12 12.568721 2.874260e+05 2 0 \n", + "13 12.738580 3.406394e+05 2 0 \n", + "14 12.861950 3.853664e+05 2 0 \n", + "15 13.139782 5.087856e+05 2 0 \n", + "16 13.314763 6.060774e+05 2 0 \n", + "17 13.506835 7.344190e+05 2 0 \n", + "18 13.664001 8.594092e+05 2 0 \n", + "19 13.938638 1.131028e+06 2 0 \n", + "20 14.083958 1.307933e+06 2 0 \n", + "21 14.162007 1.414105e+06 2 0 \n", + "22 14.258838 1.557883e+06 2 0 \n", + "23 14.377925 1.754904e+06 2 0 \n", + "24 14.447185 1.880756e+06 2 0 \n", + "25 14.536368 2.056196e+06 2 0 \n", + "26 14.590198 2.169913e+06 2 0 \n", + "27 14.657779 2.321627e+06 2 0 \n", + "28 14.719577 2.469625e+06 2 0 \n", + "29 14.857208 2.834024e+06 2 0 \n", + "30 14.976109 3.191842e+06 2 0 \n", + "31 15.093396 3.589043e+06 2 0 \n", + "32 15.192738 3.963894e+06 2 0 \n", + "33 15.293323 4.383348e+06 2 0 \n", + "34 15.405540 4.903896e+06 2 0 \n", + "35 15.424549 4.998004e+06 2 0 \n", + "36 15.458982 5.173094e+06 2 0 \n", + "37 15.423234 4.991435e+06 2 0 \n", + "38 15.408874 4.920268e+06 2 0 \n", + "39 15.379680 4.778703e+06 2 0 \n", + "40 15.432412 5.037468e+06 2 0 \n", + "\n", + " model_name \n", + "0 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "1 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "2 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "3 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "4 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "5 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "6 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "7 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "8 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "9 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "10 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "11 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "12 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "13 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "14 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "15 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "16 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "17 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "18 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "19 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "20 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "21 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "22 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "23 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "24 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "25 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "26 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "27 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "28 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "29 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "30 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "31 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "32 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "33 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "34 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "35 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "36 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "37 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "38 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "39 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n", + "40 2025_11_23 16_55_cerebros_not-gpt_meta_42/mode... \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "/usr/lib/python3.12/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n", + " self.pid = os.fork()\n", + "Global task progress: 100%|\u001b[38;2;22;206;235mโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ\u001b[0m| 3/3 [12:11<00:00, 243.86s/it]" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Index(['categorical_accuracy', 'loss', 'perplexity',\n", + " 'val_categorical_accuracy', 'val_loss', 'val_perplexity',\n", + " 'trial_number', 'subtrial_number', 'model_name'],\n", + " dtype='object')\n", + "metric_to_rank_by is: 'perplexity'\n", + "Type of metric_to_rank_by is: \n", + "metric_to_rank_by is: 'perplexity'\n", + "Type of metric_to_rank_by is: \n", + "Best result this trial was: 7.876600742340088\n", + "Type of best result: \n", + "Best model name: 2025_11_23 16_55_cerebros_not-gpt_meta_42/models/tr_0000000000000002_subtrial_0000000000000000.keras\n", + "Cerebros trained 3 models in 12.19 min. Average time per model: 4.06 min.\n", + "Cerebros best perplexity achieved in Phase I-a is 7.876600742340088\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Training Stage I-a - Model Evaluation (Subjective):\n", + "\n", + "- We retrieve the best model found during the NAS phase and test its text generation capabilities from a subjective standpoint.\n", + "- Keep in mind, this is trained on 10 text samples. It is impressive that it can generate anything, especially subjects and verbs that are on-topic and agree, and is otherwise sensible, despite being grammatically gibberish.\n", + "\n", + "FYI: The generative components we imported from cerebrosllmutils:\n", + "\n", + "## Model config\n", + "```python\n", + "\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='CerebrosNotGPTConfig')\n", + "class CerebrosNotGPTConfig:\n", + " def __init__(self, max_sequence_length=1536, padding_token=None):\n", + " self.max_sequence_length = max_sequence_length\n", + " self.padding_token = padding_token\n", + "\n", + " def get_config(self):\n", + " return {\n", + " 'max_sequence_length': self.max_sequence_length,\n", + " 'padding_token': self.padding_token\n", + " }\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " return cls(**config)\n", + "```\n", + "\n", + "## Model class we imported from cerebrosllmutil, having:\n", + "\n", + "- Greedy sampling\n", + "- Temperature scaling\n", + "- Top p sampling\n", + "- Top k sampling\n", + "- Presence penlaty\n", + "- Frequency penalty\n", + "- Repetition penalty\n", + "\n", + "```python\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='CerebrosNotGPT')\n", + "class CerebrosNotGPT(tf.keras.Model):\n", + " def __init__(self, config: Any, model: Any = None, **kwargs):\n", + " # 1. Store the nested model argument.\n", + " self.config = config\n", + " self.model = model\n", + " \n", + " # 2. Extract and remove custom kwargs (like 'model') before calling super.\n", + " # This is important to prevent 'unrecognized keyword argument' errors.\n", + " # The nested model is already extracted and stored, so it can be safely removed.\n", + " kwargs.pop('model', None)\n", + " \n", + " # 3. Call the parent constructor with the cleaned kwargs.\n", + " super().__init__(**kwargs)\n", + "\n", + " self.max_sequence_length = config.max_sequence_length\n", + " self.padding_token = config.padding_token\n", + "\n", + " def get_config(self):\n", + " base_config = super().get_config()\n", + " config_dict = {\n", + " 'config': self.config.get_config(),\n", + " }\n", + " \n", + " # Explicitly handle nested model serialization.\n", + " # This is required if Keras's automatic tracking fails.\n", + " if self.model is not None:\n", + " # Note: This approach might still suffer from weight loss.\n", + " # The recommended way is to let Keras handle it automatically.\n", + " config_dict['model'] = tf.keras.utils.serialize_keras_object(self.model)\n", + "\n", + " base_config.update(config_dict)\n", + " return base_config\n", + "\n", + " @classmethod\n", + " def from_config(cls, config):\n", + " # Separate the custom config.\n", + " config_obj_dict = config.pop('config')\n", + " config_obj = CerebrosNotGPTConfig.from_config(config_obj_dict)\n", + " \n", + " # Manually extract and load the nested model.\n", + " nested_model_config = config.pop('model', None)\n", + " if nested_model_config:\n", + " nested_model = tf.keras.utils.deserialize_keras_object(nested_model_config)\n", + " else:\n", + " nested_model = None\n", + " \n", + " # Reconstruct the outer model by passing the restored parts.\n", + " return cls(config=config_obj, model=nested_model, **config)\n", + "\n", + " def call(self, inputs, training=False):\n", + " if self.model is None:\n", + " raise ValueError(\"Inner model not initialized properly\")\n", + " return self.model(inputs, training=training)\n", + "\n", + " @staticmethod\n", + " def apply_top_k_probs(probs, k):\n", + " if k is None or k <= 0:\n", + " return probs\n", + " # Flatten and argsort for indices\n", + " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", + " keep_indices = sorted_indices[:k]\n", + " mask = tf.zeros_like(probs, dtype=tf.bool)\n", + " mask = tf.tensor_scatter_nd_update(mask, tf.reshape(keep_indices, (-1, 1)),\n", + " tf.ones((k,), dtype=tf.bool))\n", + " filtered_probs = tf.where(mask, probs, tf.zeros_like(probs))\n", + " # Renormalize\n", + " filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)\n", + " return filtered_probs\n", + "\n", + " @staticmethod\n", + " def apply_top_p_probs(probs, p):\n", + " if p is None or p >= 1.0:\n", + " return probs\n", + " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", + " sorted_probs = tf.gather(probs, sorted_indices)\n", + " cumulative_probs = tf.cumsum(sorted_probs)\n", + " mask = cumulative_probs <= p\n", + " # Always keep at least 1 token\n", + " mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)\n", + " keep_indices = tf.boolean_mask(sorted_indices, mask)\n", + " filtered_probs = tf.where(\n", + " tf.reduce_any(tf.equal(tf.range(tf.shape(probs)[0])[:, None], keep_indices), axis=1), probs,\n", + " tf.zeros_like(probs))\n", + " # Renormalize\n", + " filtered_probs = filtered_probs / tf.reduce_sum(filtered_probs)\n", + " return filtered_probs\n", + "\n", + " def generate(self,\n", + " token_ids,\n", + " do_sample=False,\n", + " max_new_tokens=None,\n", + " temperature=1.0,\n", + " top_k=None,\n", + " top_p=None,\n", + " frequency_penalty=None,\n", + " presence_penalty=None,\n", + " repetition_penalty=None):\n", + " \"\"\"\n", + " Generate text autoregressively from token IDs.\n", + " Applies filtering in sequence: penalties -> temperature -> top-k -> top-p\n", + " \"\"\"\n", + " # Convert token_ids to list if it's not already\n", + " if not isinstance(token_ids, list):\n", + " token_ids = list(token_ids)\n", + "\n", + " # Determine the actual maximum number of new tokens\n", + " if max_new_tokens is None:\n", + " max_new_tokens = self.max_sequence_length - len(token_ids)\n", + " else:\n", + " max_new_tokens = min(max_new_tokens, self.max_sequence_length - len(token_ids))\n", + "\n", + " # Initialize the generated tokens list\n", + " generated_tokens = []\n", + " current_tokens = token_ids.copy()\n", + "\n", + " # Autoregressive generation loop\n", + " for _ in range(max_new_tokens):\n", + " # Pad or truncate to max_sequence_length\n", + " if len(current_tokens) > self.max_sequence_length:\n", + " input_tokens = current_tokens[-self.max_sequence_length:]\n", + " else:\n", + " padding_needed = self.max_sequence_length - len(current_tokens)\n", + " input_tokens = current_tokens + [self.padding_token] * padding_needed\n", + "\n", + " # Convert to tensor and get model prediction\n", + " input_tensor = tf.constant([input_tokens], dtype=tf.int32)\n", + " probs_nested = self.model(input_tensor)\n", + " probs = probs_nested[0] # Already softmax probabilities (NOT logits as comment says)\n", + " logits = tf.math.log(probs + 10 ** -20) # Convert to logits for penalty application\n", + "\n", + " if do_sample:\n", + " # Apply repetition/frequency/presence penalties to logits\n", + " if frequency_penalty is not None or presence_penalty is not None:\n", + " # Collect token counts from current_tokens\n", + " token_counts = {}\n", + " for t in current_tokens:\n", + " token_counts[t] = token_counts.get(t, 0) + 1\n", + "\n", + " # Prepare penalty tensor\n", + " vocab_size = tf.shape(logits)[0]\n", + " penalties = tf.zeros_like(logits)\n", + "\n", + " for token_id, count in token_counts.items():\n", + " if token_id >= vocab_size:\n", + " continue\n", + " penalty = 0.0\n", + " if presence_penalty is not None:\n", + " penalty += presence_penalty\n", + " if frequency_penalty is not None:\n", + " penalty += frequency_penalty * count\n", + "\n", + " penalties = tf.tensor_scatter_nd_add(\n", + " penalties,\n", + " [[token_id]],\n", + " [penalty]\n", + " )\n", + "\n", + " # Subtract penalties from logits\n", + " logits = logits - penalties\n", + "\n", + " # Apply repetition penalty (standard approach)\n", + " if repetition_penalty is not None and repetition_penalty != 1.0:\n", + " # Collect unique tokens that have appeared\n", + " unique_tokens = list(set(current_tokens))\n", + " vocab_size = tf.shape(logits)[0]\n", + "\n", + " for token_id in unique_tokens:\n", + " if token_id < vocab_size:\n", + " # Divide logits of repeated tokens by penalty\n", + " logits = tf.tensor_scatter_nd_update(\n", + " logits,\n", + " [[token_id]],\n", + " [logits[token_id] / repetition_penalty]\n", + " )\n", + "\n", + " # Apply temperature\n", + " if temperature != 1.0:\n", + " logits = logits / temperature\n", + "\n", + " # Convert to probabilities\n", + " probs = tf.nn.softmax(logits)\n", + "\n", + " # Apply top-k filtering (if specified)\n", + " if top_k is not None and top_k > 0:\n", + " k = min(top_k, tf.shape(probs)[0])\n", + " # Get top-k values and indices\n", + " top_k_values, top_k_indices = tf.nn.top_k(probs, k=k, sorted=False)\n", + " # Create mask for top-k positions\n", + " top_k_mask = tf.scatter_nd(\n", + " tf.expand_dims(top_k_indices, 1),\n", + " tf.ones_like(top_k_values, dtype=tf.bool),\n", + " tf.shape(probs)\n", + " )\n", + " # Zero out non-top-k probabilities\n", + " probs = tf.where(top_k_mask, probs, tf.zeros_like(probs))\n", + " # Renormalize\n", + " probs = probs / tf.reduce_sum(probs)\n", + " print(\n", + " f\">>> After top_k: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs\")\n", + "\n", + " # Apply top-p filtering (if specified)\n", + " if top_p is not None and top_p < 1.0:\n", + " # Sort probabilities in descending order\n", + " sorted_indices = tf.argsort(probs, direction='DESCENDING')\n", + " sorted_probs = tf.gather(probs, sorted_indices)\n", + " cumulative_probs = tf.cumsum(sorted_probs)\n", + " # Create mask for top-p\n", + " mask = cumulative_probs <= top_p\n", + " # Always keep at least one token\n", + " mask = tf.concat([tf.constant([True]), mask[1:]], axis=0)\n", + " # Get indices to keep\n", + " keep_indices = tf.boolean_mask(sorted_indices, mask)\n", + " # Create mask for original indices\n", + " filter_mask = tf.scatter_nd(\n", + " tf.expand_dims(keep_indices, 1),\n", + " tf.ones_like(keep_indices, dtype=tf.bool),\n", + " tf.shape(probs)\n", + " )\n", + " # Apply mask and renormalize\n", + " probs = tf.where(filter_mask, probs, tf.zeros_like(probs))\n", + " probs = probs / tf.reduce_sum(probs)\n", + " print(\n", + " f\">>> After top_p: {tf.shape(probs)} shape, {tf.reduce_sum(tf.cast(probs > 1e-8, tf.int32))} non-zero probs\")\n", + "\n", + " # Sample from the final filtered distribution\n", + " # Get non-zero indices and their probabilities\n", + " non_zero_mask = probs > 1e-8\n", + " if tf.reduce_any(non_zero_mask):\n", + " filtered_indices = tf.where(non_zero_mask)[:, 0] # Get indices\n", + " filtered_probs = tf.boolean_mask(probs, non_zero_mask) # Get probabilities\n", + " # Sample\n", + " sampled_local_index = tf.random.categorical(tf.math.log(filtered_probs)[None, :], 1)[0, 0]\n", + " # Map back to vocabulary index\n", + " next_token_id = int(filtered_indices[sampled_local_index].numpy())\n", + " else:\n", + " # Fallback if all probabilities are zero\n", + " warn(\n", + " \"Token sampling had to revert to greedy sampling, because no probs had a value > 0, unexpected\")\n", + " next_token_id = int(tf.argmax(probs, axis=-1).numpy())\n", + "\n", + " else:\n", + " # Greedy sampling (argmax) - apply repetition penalty if needed\n", + " if repetition_penalty is not None and repetition_penalty != 1.0:\n", + " unique_tokens = list(set(current_tokens))\n", + " vocab_size = tf.shape(logits)[0]\n", + " for token_id in unique_tokens:\n", + " if token_id < vocab_size:\n", + " logits = tf.tensor_scatter_nd_update(\n", + " logits,\n", + " [[token_id]],\n", + " [logits[token_id] / repetition_penalty]\n", + " )\n", + "\n", + " next_token_id = int(tf.argmax(logits, axis=-1).numpy())\n", + "\n", + " # Check for termination condition\n", + " if next_token_id == self.padding_token:\n", + " break\n", + "\n", + " # Add to generated tokens and update current tokens\n", + " generated_tokens.append(int(next_token_id))\n", + " current_tokens.append(int(next_token_id))\n", + "\n", + " # Check if we've reached max sequence length\n", + " if len(current_tokens) >= self.max_sequence_length:\n", + " break\n", + "\n", + " return token_ids + generated_tokens\n", + "\n", + "```" + ], + "metadata": { + "id": "96KSf1hKoe0H" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "## How this LLM wrapper works under the hood: A Simple Overview\n", + "\n", + "- Think of a Large Language Model like the \"autocomplete\" on your cell phone's keyboard that suggests the next word.\n", + "- Now, imagine you continuously click the suggested next word.\n", + "- The model picks the mathematically most likely next word, and you just go with it, and pick the next, then the next ...\n", + "\n", + "### Here is the step-by-step flow of how it generates text.\n", + "\n", + "1. INPUT: The Prompt\n", + "\n", + "The process always starts with a piece of text from you, the user.\n", + "\n", + "\"Write a story\"\n", + "\n", + "2. STEP 1: Tokenization โ€” From Words to Numbers\n", + "\n", + "A computer doesn't understand letters or words; it understands numbers. The first step is to convert the prompt into a sequence of numbers the model can process. The tokenizer is a specialized dictionary for this job.\n", + "\n", + " What comes in: A string of text (\"Write a story\").\n", + " What goes out: A list of numerical IDs ([92, 21, 54, 21, 63, ...]).\n", + "\n", + "To make processing consistent, the input is always padded to a fixed length (e.g., 40 tokens). Any empty slots are filled with a special ID that is assigned by the tokenizer.\n", + "\n", + "\"Write a story\" -> tokenizer -> [92, 21, 54, 21, 63, 1234, 1234, ... (length 40)]\n", + "\n", + "For example it may look like:\n", + "```\n", + "92 = \"Write\"\n", + "21 = \" \"\n", + "54 = \"a\"\n", + "63 = \"story\"\n", + "1234 = \"\" (Repeated until there are 40 numbers)\n", + "```\n", + "\n", + "3. The Model's Core: Going From Token IDs to the Predicted Next Token:\n", + "\n", + "This is the \"black box\" part. Inside the model, 4 basic things happen:\n", + "\n", + " 1. Embedding (Converts the discrete, high-dimensional sequence of tokens into a continuous distribution of a smaller dimensionality).\n", + " 2. Positional embedding: Positional embedding: Takes the output of the embedding layer and represents their relative sequential order as a continuous distribution with a clear mathematical relationship.\n", + " 3. Prediction: Prediction: A lattice of Dense layers, arranged as columns and rows, each having randomized lateral connectivity with other Dense layers on the same row, and randomized vertical connectivity with Dense layers on other rows. This takes the positional embedding's output and returns a numerical answer from its head layer. This element, produced by the Cerebros NAS, serves as a more computationally efficient alternative to the attention block used in other LLMs. The output is of shape (BATCH_SIZE, VOCABULARY_SIZE) as logits.\n", + " 4. Output activation (Scales the output to a valid range). In this case, the raw output is a tensor of shape (BATCH_SIZE, VOCABULARY_SIZE). The numbers need to be cast as probabilities, so the valid range is:\n", + " - Each element in the list must be in the range between 0 and 1 (inclusive).\n", + " - The entire list of numbers must add up to 1.\n", + " - Softmax is used to accomplish this.\n", + "\n", + "As mentioned before, this is a \"Single Head\" model, unlike most LLMs (like GPT-3/4). Each call returns **only** the next token expected in the sequence, expressed as a list of probabilities (probs) of shape (BATCH_SIZE, VOCABULARY_SIZE).\n", + "\n", + "\n", + "4. Predicting the Next Word From the Output of The Final Layer:\n", + "\n", + "After the model returns a list of probabilities, we must **pick the next word** from this. There are VOCABULARY_SIZE words in the vocabulary, each assigned an index position on this list.\n", + "\n", + "5. Sampling\n", + "\n", + "- **Greedy Sampling** The naive strategy is to just pick the highest probability in this distribution (we call this greedy sampling) and assume it is the correct next token. You then decode that token ID and use it as the next word. Then de-code that toekn id and use that as the next word. Naively assuming the highest probability is correct makes for a few problems, including:\n", + " - The output will be identical every time you write the same prompt.\n", + " - Common words like \"the\", \"and\", ... will be used too often and used out of place.\n", + " - The text will seem \"dry\" and lack creative appeal.\n", + "- **Beam Sampling**: The better approach, is scaling then sampling from a few of the top choices. We apply scaling to the logits and recalculate the probabilities. Then, we eliminate unlikely possibilities. This leaves a smaller set of plausible tokens, from which we randomly select the next word. The methods we use are:\n", + " - **Presence penalty:** Steeply penalizes the logit for a token that has already been used recently or as the last word in the sequence, making it very unlikely to survive sampling and be selected. **Its purpose:** Mainly prevents the same word from being used twice **in immediate succession** \"This is **the the the** problem which **this this** scaling technique should fix.\"\n", + " - **Frequency penalty:** Mildly penalizes the logit for a token that has been **overused** in the text, but **not necessarily** the last or recent word, making it less likely to be chosen repeatedly but still possible. **For an example:** \"This technique **like** fixes **like** this from **like** happening. It's **like** really really annoying.\"\n", + " - **Repetition penalty scaling**: A penalty that balances the effects of both presence and frequency penalties, attempting to fix both problems at the same time.\n", + " - **Temperature scaling:** Temperature scaling divides logits by a number you set for 'tempterature' to control output \"creativity\" vs \"precision\". Low temperatures less than 1 make the model's top choices more likely, creating predictable text. High temperatures greater than 1, give less likely words a better chance, leading to more diverse and random text. Basically the higher you set it, the more creative and less factual the LLM's writing will be, the lower, the more precice and factual.\n", + " - After applying all scaling, we convert the logits back to probabilities using softmax. We then proceed to sampling:\n", + " - **Top k sampling**: Set a number 'k'. Eliminate all but the highest k numbers on this list of scaled probabilities.\n", + " - **Top p sampling:** Set a number 'p'. Starting from the most likely token, add up the probabilities until the sum reaches or exceeds 'p'. Keep only this cumulative set of tokens.\n", + " \n", + "Now that we have scaled and filtered the list of tokens, we randomly pick one from the remaining options.\n", + "\n", + "\n", + "6. ## The Generation Loop: We just do this on repeat.\n", + "\n", + "The model only predicts one word at a time. To complete text, we repeat this with the result of the original prompt + the result of predicting the next. We call this an **autoregressive** loop.\n", + "\n", + "\n", + " Start with a prompt \"\"Write, a, story\"\n", + " \n", + " Input: [Write, a, story]\n", + " Model predicts the token that decodes to: \"about\"\n", + "\n", + " Repeat 1: New Input: The appended sequence is fed back into the model.\n", + " New Input: [Write, a, story, about]\n", + " Model predicts: \"a\"\n", + "\n", + " Repeat 2:New Input:\n", + " New Input: [Write, a, story, about, a]\n", + " Model predicts: \"fox\"\n", + "\n", + "This loop continues until the model generates a special \"end-of-sequence\" token / pad token or it reaches its maximum length limit (40 tokens in our example).\n", + "\n", + "\n", + "\n", + "## Revisiting the analogy of the auto complete on repeat, this is what this looks like:\n" + ], + "metadata": { + "id": "kMW_6Vrq_Yi9" + } + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "NaMi9QniKqdO" + } + }, + { + "cell_type": "code", + "source": [ + "# Get the best model from the search\n", + "best_model_found = cerebros_automl.get_best_model(purge_model_storage_files='slate')\n", + "\n", + "# Create config and generative model wrapper\n", + "config = CerebrosNotGPTConfig(\n", + " max_sequence_length=MAX_SEQ_LENGTH,\n", + " padding_token=tokenizer.pad_token_id\n", + ")\n", + "generator = CerebrosNotGPT(config, model=best_model_found)\n", + "\n", + "# Test if the model can be built successfully\n", + "text = \"This is a test ...\"\n", + "input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", + "current_tokens = input_ids.copy()\n", + "PADDING_TOKEN = tokenizer.pad_token_id\n", + "\n", + "if len(current_tokens) > MAX_SEQ_LENGTH:\n", + " input_tokens = current_tokens[-MAX_SEQ_LENGTH:]\n", + "else:\n", + " padding_needed = MAX_SEQ_LENGTH - len(current_tokens)\n", + " input_tokens = current_tokens + [PADDING_TOKEN] * padding_needed\n", + "\n", + "# A dummy pass to force the model to build\n", + "\n", + "input_tensor = tf.constant([input_tokens], dtype=tf.int32)\n", + "\n", + "try:\n", + " _ = generator(input_tensor)\n", + " print(\"โœ… Building LLM Model Successful!\")\n", + "except Exception as exc:\n", + " error_message = f\"โŒ Building model returned the error: {exc}\"\n", + " print(error_message)\n" + ], + "metadata": { + "id": "AEk-TtPCxleV", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d253eeeb-831e-48ce-f256-c8f10540064a" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/layers/layer.py:421: UserWarning: `build()` was called on layer 'interleaved_ro_pe', however the layer does not have a `build()` method implemented and it looks like it has unbuilt state. This will cause the layer to be marked as built, despite not being actually built, which may cause failures down the line. Make sure to implement a proper `build()` method.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "โœ… Building LLM Model Successful!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Text Generation Utilities\n", + "\n", + "We define two helper functions for text generation:\n", + "\n", + "- One for greedy sampling\n", + "- One for beam sampling with various parameters." + ], + "metadata": { + "id": "u6-wAM0XyUZC" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Required parameter\n", + "\n", + "trial_number =1\n", + "\n", + "\n", + "# Utility function for greedy sampling\n", + "def complete_text_greedy(text: str, max_new_tokens: int = 10) -> str:\n", + " input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", + " generated_tokens = generator.generate(\n", + " token_ids=input_ids,\n", + " do_sample=False,\n", + " max_new_tokens=max_new_tokens\n", + " )\n", + " generated_text = tokenizer.decode(generated_tokens).replace(text, \"\")\n", + " return generated_text\n", + "\n", + "# Utility function for beam sampling\n", + "def complete_text_beam(text: str,\n", + " max_new_tokens: int = 10,\n", + " temperature: float = 0.75,\n", + " top_k: int = 75,\n", + " top_p: float = 0.98,\n", + " repetition_penalty: float = None,\n", + " presence_penalty: float = 1.3,\n", + " frequency_penalty: float = 1.4) -> str:\n", + " input_ids = tokenizer(text, add_special_tokens=False)['input_ids']\n", + " generated_tokens = generator.generate(\n", + " token_ids=input_ids,\n", + " do_sample=True,\n", + " max_new_tokens=max_new_tokens,\n", + " temperature=temperature,\n", + " top_k=top_k,\n", + " top_p=top_p,\n", + " presence_penalty=presence_penalty,\n", + " frequency_penalty=frequency_penalty\n", + " )\n", + " generated_text = tokenizer.decode(generated_tokens).replace(text, \"\")\n", + " return generated_text\n" + ], + "metadata": { + "id": "f8XigcJcykLn" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Running Generation Tests\n", + "\n", + "We run a series of tests with different prompts and sampling parameters to evaluate the quality of the model from Stage I-a." + ], + "metadata": { + "id": "HG0IjcWEyrXn" + } + }, + { + "cell_type": "code", + "source": [ + "def test_text(test_prompt: str, max_new_tokens: int, result_cutoff: float, trial_id: int,\n", + " test_sample_number: int, result_0: float) -> None:\n", + " \"\"\"\n", + " If the result_0 < result_cutoff, this will run a matrix of different sampling values and print out the resulting text for human subjective evaluation.\n", + "\n", + " Parameters:\n", + " - test_prompt: a string to prompt generation\n", + " - max_new_tokens: int, number of tokens to generate unless we generate a stop token.\n", + " - sample_number: Metadata for sample...\n", + " - result_0: Perplexity score from this run\n", + " - result_cutoff: Perplexity score that would be expected to indicate a trial worth running this pn\n", + "\n", + " \"\"\"\n", + " if result_0 < result_cutoff:\n", + " generation_param_permutations = [\n", + " # #3\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.3,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " # #4\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.3,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " # #5\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 75,\n", + " 'top_p': 0.97,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.3,\n", + " 'frequency_penalty': 1.4},\n", + " # #6\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.75,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4},\n", + " # #7\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4},\n", + " # #8\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 75,\n", + " 'top_p': 0.98,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 40,\n", + " 'top_p': 0.96,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.7,\n", + " 'top_k': 45,\n", + " 'top_p': 0.97,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.3\n", + " }, #\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.6,\n", + " 'top_k': 75,\n", + " 'top_p': 0.99,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.65,\n", + " 'top_k': 75,\n", + " 'top_p': 0.985,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 1.4,\n", + " 'frequency_penalty': 1.4\n", + " },\n", + " {\n", + " 'max_new_tokens': max_new_tokens,\n", + " 'temperature': 0.8,\n", + " 'top_k': 75,\n", + " 'top_p': 0.99,\n", + " 'repetition_penalty': None,\n", + " 'presence_penalty': 0.7,\n", + " 'frequency_penalty': 0.7\n", + " }\n", + " ]\n", + " # Default cases, no params\n", + " response_1 = complete_text_greedy(text=test_prompt, max_new_tokens=max_new_tokens)\n", + " print(\n", + " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: '{test_prompt}' RESPONSE: '{response_1}'\")\n", + " # print(f\"Sample {sample_number}: I ask the generator (greedy): {test_prompt}... It responds: '{response_1}'.\")\n", + " response_2 = complete_text_beam(text=test_prompt, max_new_tokens=max_new_tokens)\n", + " print(\n", + " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: '{test_prompt}' RESPONSE: '{response_2}'.\")\n", + " # print(f\"Sample {sample_number}: I ask the generator (Beam defaults - max_new_tokens: 10, temperature: 0.75, top_k: 75, top_p: 0.98, repetition_penalty: None, presence_penalty: 1.3, frequency_penalty: 1.4): {test_prompt}... It responds: '{response_2}'.\")\n", + "\n", + " for perm_0 in generation_param_permutations:\n", + " response_0 = complete_text_beam(text=test_prompt,\n", + " max_new_tokens=max_new_tokens,\n", + " temperature=perm_0['temperature'],\n", + " top_k=perm_0['top_k'],\n", + " top_p=perm_0['top_p'],\n", + " repetition_penalty=perm_0['repetition_penalty'],\n", + " presence_penalty=perm_0['presence_penalty'],\n", + " frequency_penalty=perm_0['frequency_penalty'])\n", + " print(\n", + " f\"Trial #: {trial_id} Text Sample #: {test_sample_number} Perplexity: {result_0} GENERATE PARAMS: max_new_tokens={perm_0['max_new_tokens']} temperature={perm_0['temperature']}, top_k={perm_0['top_k']}, top_p={perm_0['top_p']}, repetition_penalty={perm_0['repetition_penalty']} presence_penalty={perm_0['presence_penalty']} frequency_penalty{perm_0['frequency_penalty']} PROMPT: '{test_prompt}' RESPONSE: '{response_0}'\")\n", + "\n", + "\n", + "prompt_samples = [\n", + " \"I saw the sun and it was as shining on the\",\n", + " \"And God said, Let there be light: and there \",\n", + " \"In the beginning God created the heavens\"\n", + "]\n", + "\n", + "\n", + "counter = 0\n", + "for sample in prompt_samples:\n", + " test_text(\n", + " test_prompt=sample,\n", + " max_new_tokens=MAX_NEW_TOKENS,\n", + " result_cutoff=15,\n", + " trial_id=trial_number,\n", + " test_sample_number=counter,\n", + " result_0=phase_i_a_result)\n", + " counter += 1\n", + "\n", + "\n", + "collect()\n" + ], + "metadata": { + "id": "hut-HAJjyvn-", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e05a9fb1-706e-4f26-e668-825f7df940c2" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth the the the the the the the the the the the the the the'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth God beginning'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth. beginning created God'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 13 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 14 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created. beginning God earthless earth beginning'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth God.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 16 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' God earth beginning created heavens. earth created'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' beginning created earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created beginning earth heavens. God earth'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created earth beginning God heavens. earth'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' God earth beginning created beginning God. earth heavens created beginning heavens earth. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 18 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' created earth beginning God heavens. created earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 7 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 9 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 12 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' beginning earth God created earth heavens'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 16 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' earth created heavens earth\\Order.cpt. the beginning'\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the the.. the the the the the the the the the the.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' earth. the'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' earth. the.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. earth the'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. the earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. the earth'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. earth'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there. the earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' the earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ''\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'In the beginning God created the heavens' RESPONSE: ' heavens heavens heavens heavens and heavens heavens heavens heavens heavens heavens heavens heavens and and'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' was earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and. earth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and earth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 6 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 3 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 2 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 7.876600742340088 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and created earth.'\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "5885" + ] + }, + "metadata": {}, + "execution_count": 21 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Syage I-b: Extended Training\n", + "\n", + "- Now, we take the best model from Stage I-a and continue training it on a larger dataset.\n", + "- This uses a streaming `tf.data.Dataset` generator to allow handling of much larger data sets without using more RAM.\n", + "- This would allow us to select far more samples, but for now, we select a small subset for this small scale environment.\n", + "\n", + "## Streaming Data Generator for Large Datasets\n", + "\n", + "\n", + "The **SampleExpansionGenerator** class, which we create below:\n", + "\n", + " - Applies and streams the same preprocessing logic to the raw text samples as we did in Stage I-a.\n", + " - However, it preprocesses one **sample expansion batch** at a time and stores the resulting expanded samples in memory.\n", + " - It then feeds the resulting expanded samples to the model in batches matching the **model's BATCH_SIZE** as requested by the training loop.\n", + " - **sample expansion batch** is not the same as **the model's BATCH_SIZE**.\n", + "\n", + "For example, we could train on a dataset of 10 \\** 6 samples, while setting the **sample expansion batch size** to 100 while the **model's batch size** is 10.\n", + " - 100 raw text samples will be expoanded at a time.\n", + " - This results in thousands of expanded sub-samples being queued and ready for the model to take.\n", + " - The model will take 10 of these at a time until it does not have 10 left to provide.\n", + " - Then, the generator will then preprocess another 100 text samples and garbage collect.\n", + "\n", + "This allows training on datasets that would be much larger than available memory after expansion, making the training scalable.\n", + "\n", + "\n", + "### The sample expansion batch size should be optimized to balance two opposing forces:\n", + "\n", + " - Memory pressure increases with the number of expanded samples held in memory.\n", + " - Delays are caused by switching back and forth between tensor operations and preprocessing when batches are too small.\n", + "\n" + ], + "metadata": { + "id": "tuhQx2kjy4nn" + } + }, + { + "cell_type": "code", + "source": [ + "# Replace your existing class and function with these:\n", + "class SampleExpansionGenerator:\n", + " def __init__(self,\n", + " raw_text_samples,\n", + " tokenizer,\n", + " sample_expansion_batch_size=50,\n", + " model_batch_size=10,\n", + " prompt_length_0=PROMPT_LENGTH,\n", + " max_seq_length=MAX_SEQ_LENGTH,\n", + " vocabulary_size=VOCABULARY_SIZE):\n", + "\n", + " self.raw_text_samples = raw_text_samples\n", + " self.tokenizer = tokenizer\n", + " self.sample_expansion_batch_size = sample_expansion_batch_size\n", + " self.model_batch_size = model_batch_size\n", + " self.prompt_length_0 = prompt_length_0\n", + " self.max_seq_length = max_seq_length\n", + " self.vocabulary_size = vocabulary_size\n", + " self.data = []\n", + " self.labels = []\n", + " self.current_index = 0\n", + "\n", + " def _expand_next_batch(self):\n", + " # If we've already processed all raw samples for this epoch, do nothing.\n", + " if self.current_index >= len(self.raw_text_samples):\n", + " return\n", + "\n", + " # Determine the next meta-batch\n", + " start_idx = self.current_index\n", + " end_idx = min(start_idx + self.sample_expansion_batch_size, len(self.raw_text_samples))\n", + "\n", + " batch_samples = self.raw_text_samples[start_idx:end_idx]\n", + " self.current_index = end_idx\n", + "\n", + " # Run prepare_data on this batch\n", + " input_ids_list, labels_list, _ = prepare_data(\n", + " data_0=batch_samples,\n", + " tokenizer_0=self.tokenizer,\n", + " max_seq_length=self.max_seq_length,\n", + " prompt_length=self.prompt_length_0)\n", + "\n", + " # Add the new data to our internal queues\n", + " self.data.extend(input_ids_list)\n", + " self.labels.extend(labels_list)\n", + "\n", + " def __iter__(self):\n", + " # Reset to initial state for new epoch\n", + " self.current_index = 0\n", + " self.data = []\n", + " self.labels = []\n", + " return self\n", + "\n", + " def __next__(self):\n", + " # If queues are empty, try to expand them from raw samples\n", + " if not self.data:\n", + " self._expand_next_batch()\n", + "\n", + " # If they are STILL empty after trying to expand, the epoch is over.\n", + " if not self.data:\n", + " raise StopIteration\n", + "\n", + " # Pop and return one sample\n", + " input_sample = self.data.pop(0)\n", + " label_sample = self.labels.pop(0)\n", + "\n", + " return ((input_sample,), label_sample)\n", + "\n", + "\n", + "# Create the tf.data.Dataset\n", + "def create_dataset(raw_text_samples, tokenizer, sample_expansion_batch_size=50, model_batch_size=10) -> tf.data.Dataset:\n", + " generator_0 = SampleExpansionGenerator(\n", + " raw_text_samples=raw_text_samples,\n", + " tokenizer=tokenizer,\n", + " sample_expansion_batch_size=sample_expansion_batch_size,\n", + " model_batch_size=model_batch_size # Pass this parameter\n", + " )\n", + "\n", + " dataset = tf.data.Dataset.from_generator(\n", + " lambda: generator_0,\n", + " # output_signature=(\n", + " # (tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32),),\n", + " # # tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32), # Use generator's parameter\n", + " # tf.TensorSpec(shape=(generator_0.vocabulary_size,), dtype=tf.float32) # Use generator's parameter\n", + " # )\n", + " output_signature=(\n", + " (tf.TensorSpec(shape=(generator_0.max_seq_length,), dtype=tf.int32),), # A tuple containing ONE TensorSpec\n", + " tf.TensorSpec(shape=(generator_0.vocabulary_size,), dtype=tf.float32) # A single TensorSpec\n", + " )\n", + " )\n", + "\n", + " # Batch it\n", + " dataset = dataset.batch(model_batch_size)\n", + " dataset = dataset.prefetch(tf.data.AUTOTUNE) # Prefetch for performance\n", + " return dataset\n", + "\n", + "# Create training and validation datasets\n", + "phase_i_b_train_dataset = create_dataset(\n", + " raw_text_samples=phase_i_b_train_samples,\n", + " tokenizer=tokenizer,\n", + " sample_expansion_batch_size=PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE,\n", + " model_batch_size=batch_size\n", + ")\n", + "\n", + "phase_i_b_val_dataset = create_dataset(\n", + " raw_text_samples=phase_i_b_val_samples,\n", + " tokenizer=tokenizer,\n", + " sample_expansion_batch_size=PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE,\n", + " model_batch_size=batch_size\n", + ")\n" + ], + "metadata": { + "id": "MHWWE0xIzLRD" + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "type(phase_i_b_train_dataset)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 121 + }, + "id": "HxwyQzSppQwp", + "outputId": "89a48aa5-c364-4057-98c4-fc4a291f448e" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "tensorflow.python.data.ops.prefetch_op._PrefetchDataset" + ], + "text/html": [ + "
\n", + "
tensorflow.python.data.ops.prefetch_op._PrefetchDataset
def __init__(input_dataset, buffer_size, slack_period=None, name=None)
/usr/local/lib/python3.12/dist-packages/tensorflow/python/data/ops/prefetch_op.pyA `Dataset` that asynchronously prefetches its input.
\n", + " \n", + "
" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "## Model Compilation for Phase I-b\n", + "\n", + "- We recompile the model with the same base optimizer (AdamW), however this time with a custom learning rate scheduler (WarmupCosineDecayRestarts), and for disambiguation, relevant metrics for this training phase. We also add an EarlyStopping callback which is mainly being used to restore the weights from the best epoch, if that turns out to not be the last epoch.\n", + "\n", + "\n", + "## For those wanting to scale this up, a word to point out:\n", + "\n", + "The parameters for the learning rate scheduler may need to be optimized. They will be different for your data. Alternatively, you can remove the learning rate scheduler if this is too much trail and error.\n", + "\n", + "- We set the starting learning rate at: 0.0039295722955565125\n", + "- We set warmup steps to 1140, which for the data selected is 15 epochs.\n", + "- We set first decay steps to 1900, which for this data set is about 25 epochs.\n", + "\n", + "Also:\n", + "\n", + "Additionally, the early stopping callback will likely need to be adjusted. When training at scale, you may use a lower learning rate and a larger number of epochs, as well as a larger value for the start_from_epoch parameter (which specifies when to begin tracking the metric for early stopping).\n", + "\n", + "FYI, this is the custom scheduler we imported from cerebrosllmutils (CosineDecayRestarts augmented with warmup steps):\n", + "\n", + "\n", + "```python\n", + "# A custom schedule: Cosine decay with some warm - up steps\n", + "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='WarmupCosineDecayRestarts')\n", + "class WarmupCosineDecayRestarts(tf.keras.optimizers.schedules.LearningRateSchedule):\n", + " \"\"\"\n", + " A learning rate schedule that combines a linear warmup with cosine decay restarts.\n", + " \"\"\"\n", + "\n", + " def __init__(self, initial_learning_rate, warmup_steps, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0):\n", + " super().__init__()\n", + "\n", + " # Store all parameters as public attributes for get_config serialization\n", + " self.initial_learning_rate = initial_learning_rate\n", + " self.warmup_steps = warmup_steps\n", + " self.first_decay_steps = first_decay_steps\n", + " self.t_mul = t_mul\n", + " self.m_mul = m_mul\n", + " self.alpha = alpha\n", + "\n", + " # Create the CosineDecayRestarts schedule for internal logic.\n", + " # The parameters passed here are the same ones we just stored.\n", + " self.cosine_restarts_schedule = tf.keras.optimizers.schedules.CosineDecayRestarts(\n", + " initial_learning_rate=initial_learning_rate,\n", + " first_decay_steps=first_decay_steps,\n", + " t_mul=t_mul,\n", + " m_mul=m_mul,\n", + " alpha=alpha\n", + " )\n", + "\n", + "\n", + " def __call__(self, step):\n", + " step = tf.cast(step, dtype=tf.float32)\n", + "\n", + " # Calculate the learning rate for both phases unconditionally\n", + " warmup_lr = self.initial_learning_rate * step / self.warmup_steps\n", + "\n", + " # The cosine schedule is designed to start from step 0, so we give it\n", + " # the \"post-warmup\" step count.\n", + " decay_lr = self.cosine_restarts_schedule(step - self.warmup_steps)\n", + "\n", + " # Create a multiplier that is 1.0 during warmup and 0.0 after.\n", + " # tf.cast(condition, tf.float32) converts a boolean tensor to 1.0 or 0.0.\n", + " warmup_multiplier = tf.cast(step < self.warmup_steps, tf.float32)\n", + "\n", + " # The decay multiplier is the opposite.\n", + " decay_multiplier = 1.0 - warmup_multiplier\n", + "\n", + " # Combine the two learning rates. Only one will be active at a time.\n", + " return (warmup_multiplier * warmup_lr) + (decay_multiplier * decay_lr)\n", + "\n", + " def get_config(self):\n", + " # Use the stored public attributes for the config.\n", + " # This bypasses the issue of accessing private attributes (_t_mul) from\n", + " # the nested Keras object, which can be brittle.\n", + " config = {\n", + " \"initial_learning_rate\": self.initial_learning_rate,\n", + " \"warmup_steps\": self.warmup_steps,\n", + " \"first_decay_steps\": self.first_decay_steps,\n", + " \"t_mul\": self.t_mul,\n", + " \"m_mul\": self.m_mul,\n", + " \"alpha\": self.alpha,\n", + " }\n", + "\n", + " # Use from_config to properly allow deserialization\n", + " return config\n", + "```\n", + "\n" + ], + "metadata": { + "id": "DPaeJKEzzlPw" + } + }, + { + "cell_type": "code", + "source": [ + "# Define loss and metrics for Phase I-b\n", + "phase_i_b_loss = tf.keras.losses.CategoricalCrossentropy()\n", + "phase_i_b_categorical_accuracy = tf.keras.metrics.CategoricalAccuracy()\n", + "phase_i_b_perplexity = Perplexity(name=\"perplexity_phase_i_b\")\n", + "\n", + "# Create the learning rate schedule instance\n", + "lr_scheduler = WarmupCosineDecayRestarts(\n", + " initial_learning_rate=INITIAL_LR_STAGE_I_B,\n", + " warmup_steps=WARMUP_STEPS,\n", + " first_decay_steps=FIRST_DECAY_STEPS_STAGE_I_B,\n", + " t_mul=1.0,\n", + " m_mul=0.9,\n", + " alpha=0.01\n", + ")\n", + "\n", + "# Recompile the existing model\n", + "generator.model.compile(\n", + " loss=phase_i_b_loss,\n", + " metrics=[phase_i_b_categorical_accuracy, phase_i_b_perplexity],\n", + " optimizer=tf.keras.optimizers.AdamW(\n", + " learning_rate=lr_scheduler,\n", + " weight_decay=phase_i_b_weight_decay,\n", + " gradient_accumulation_steps=phase_i_b_gradient_accumulation_steps\n", + " ),\n", + " jit_compile=True\n", + ")\n", + "\n", + "# Define the Early Stopping callback\n", + "early_stopping = tf.keras.callbacks.EarlyStopping(\n", + " monitor='perplexity_phase_i_b', # Monitor validation perplexity\n", + " patience=10, # Number of epochs with no improvement after which training will be stopped.\n", + " verbose=1,\n", + " restore_best_weights=True, # Restores model weights from the epoch with the best value of the monitored metric.\n", + " mode='min',\n", + " start_from_epoch=40\n", + ")\n", + "\n", + "\n", + "callbacks_list = [early_stopping]\n" + ], + "metadata": { + "id": "GGkEVa2dzOtf" + }, + "execution_count": 24, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Run Stage I-b Training\n", + "\n", + "- We start the training process using the model.fit method with the new datasets and callbacks to continue training the same model on another dataset. In our at scale runs, both the previous stage and this stage are dene on far more data." + ], + "metadata": { + "id": "y_K5nLzVz_-b" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "\n", + "\n", + "\n", + "# print(\"Calculating steps per epoch...\")\n", + "# train_steps = sum(1 for _ in phase_i_b_train_dataset)\n", + "# val_steps = sum(1 for _ in phase_i_b_val_dataset)\n", + "# print(f\"Calculated training steps per epoch: {train_steps}\")\n", + "# print(f\"Calculated validation steps: {val_steps}\")\n", + "\n", + "# Train the model\n", + "phase_i_b_history = generator.model.fit(\n", + " x=phase_i_b_train_dataset,\n", + " validation_data=phase_i_b_val_dataset,\n", + " epochs=phase_i_b_epochs,\n", + " callbacks=callbacks_list\n", + ")\n", + "\n", + "# Store history and get the best validation perplexity\n", + "phase_i_b_history = pd.DataFrame(phase_i_b_history.history)\n", + "result_phase_i_b = float(phase_i_b_history['perplexity_phase_i_b'].min())\n", + "f\"Result of Stage 1-b training {result_phase_i_b}\"\n" + ], + "metadata": { + "id": "3GGqvlIl0FvV", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "0daf05b2-7072-4818-8b47-a05558b33470" + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 1/53\n", + " 76/Unknown \u001b[1m69s\u001b[0m 636ms/step - categorical_accuracy: 0.0389 - loss: 13.6508 - perplexity_phase_i_b: 966782.2500" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/keras/src/trainers/epoch_iterator.py:160: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.\n", + " self._interrupted_warning()\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m73s\u001b[0m 690ms/step - categorical_accuracy: 0.0388 - loss: 13.6471 - perplexity_phase_i_b: 962529.6250 - val_categorical_accuracy: 0.0492 - val_loss: 11.5516 - val_perplexity_phase_i_b: 103939.8906\n", + "Epoch 2/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 609ms/step - categorical_accuracy: 0.0164 - loss: 13.8992 - perplexity_phase_i_b: 2969392.7500 - val_categorical_accuracy: 0.0492 - val_loss: 12.0771 - val_perplexity_phase_i_b: 175791.3594\n", + "Epoch 3/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 609ms/step - categorical_accuracy: 0.0250 - loss: 12.8039 - perplexity_phase_i_b: 402124.0625 - val_categorical_accuracy: 0.0656 - val_loss: 12.3528 - val_perplexity_phase_i_b: 231597.3438\n", + "Epoch 4/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 591ms/step - categorical_accuracy: 0.0415 - loss: 11.6595 - perplexity_phase_i_b: 140648.8125 - val_categorical_accuracy: 0.0492 - val_loss: 12.4123 - val_perplexity_phase_i_b: 245801.6250\n", + "Epoch 5/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 611ms/step - categorical_accuracy: 0.0439 - loss: 11.1954 - perplexity_phase_i_b: 73950.6797 - val_categorical_accuracy: 0.0492 - val_loss: 12.3395 - val_perplexity_phase_i_b: 228538.3750\n", + "Epoch 6/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 589ms/step - categorical_accuracy: 0.0816 - loss: 10.2579 - perplexity_phase_i_b: 29194.4102 - val_categorical_accuracy: 0.0656 - val_loss: 12.1179 - val_perplexity_phase_i_b: 183113.2031\n", + "Epoch 7/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.0590 - loss: 9.9608 - perplexity_phase_i_b: 22667.8711 - val_categorical_accuracy: 0.0492 - val_loss: 11.8740 - val_perplexity_phase_i_b: 143489.0312\n", + "Epoch 8/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m97s\u001b[0m 599ms/step - categorical_accuracy: 0.0593 - loss: 8.9806 - perplexity_phase_i_b: 8207.2861 - val_categorical_accuracy: 0.0328 - val_loss: 12.3863 - val_perplexity_phase_i_b: 239495.6562\n", + "Epoch 9/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 558ms/step - categorical_accuracy: 0.0661 - loss: 7.8740 - perplexity_phase_i_b: 2828.0859 - val_categorical_accuracy: 0.0164 - val_loss: 11.9790 - val_perplexity_phase_i_b: 159370.9219\n", + "Epoch 10/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m50s\u001b[0m 630ms/step - categorical_accuracy: 0.1062 - loss: 6.8127 - perplexity_phase_i_b: 987.0147 - val_categorical_accuracy: 0.0328 - val_loss: 11.2031 - val_perplexity_phase_i_b: 73360.1719\n", + "Epoch 11/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 604ms/step - categorical_accuracy: 0.0687 - loss: 5.7574 - perplexity_phase_i_b: 324.8636 - val_categorical_accuracy: 0.0164 - val_loss: 9.6458 - val_perplexity_phase_i_b: 15456.3154\n", + "Epoch 12/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m87s\u001b[0m 686ms/step - categorical_accuracy: 0.0943 - loss: 4.8160 - perplexity_phase_i_b: 124.1660 - val_categorical_accuracy: 0.0492 - val_loss: 8.6260 - val_perplexity_phase_i_b: 5574.9229\n", + "Epoch 13/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 608ms/step - categorical_accuracy: 0.1206 - loss: 4.4321 - perplexity_phase_i_b: 84.3652 - val_categorical_accuracy: 0.0328 - val_loss: 8.1588 - val_perplexity_phase_i_b: 3493.8950\n", + "Epoch 14/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m54s\u001b[0m 597ms/step - categorical_accuracy: 0.1237 - loss: 4.4953 - perplexity_phase_i_b: 91.3969 - val_categorical_accuracy: 0.0328 - val_loss: 8.3403 - val_perplexity_phase_i_b: 4189.2686\n", + "Epoch 15/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 579ms/step - categorical_accuracy: 0.0997 - loss: 4.2491 - perplexity_phase_i_b: 70.9299 - val_categorical_accuracy: 0.0656 - val_loss: 8.6163 - val_perplexity_phase_i_b: 5520.8823\n", + "Epoch 16/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m88s\u001b[0m 585ms/step - categorical_accuracy: 0.1204 - loss: 4.2542 - perplexity_phase_i_b: 70.9240 - val_categorical_accuracy: 0.0656 - val_loss: 8.7940 - val_perplexity_phase_i_b: 6594.3228\n", + "Epoch 17/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 577ms/step - categorical_accuracy: 0.1386 - loss: 4.2547 - perplexity_phase_i_b: 70.8944 - val_categorical_accuracy: 0.0984 - val_loss: 8.7318 - val_perplexity_phase_i_b: 6196.8022\n", + "Epoch 18/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 597ms/step - categorical_accuracy: 0.1209 - loss: 4.2489 - perplexity_phase_i_b: 70.4136 - val_categorical_accuracy: 0.0984 - val_loss: 8.9164 - val_perplexity_phase_i_b: 7453.2446\n", + "Epoch 19/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 567ms/step - categorical_accuracy: 0.1236 - loss: 4.2367 - perplexity_phase_i_b: 69.5275 - val_categorical_accuracy: 0.0656 - val_loss: 8.8083 - val_perplexity_phase_i_b: 6689.4990\n", + "Epoch 20/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 596ms/step - categorical_accuracy: 0.1506 - loss: 4.1450 - perplexity_phase_i_b: 63.6329 - val_categorical_accuracy: 0.0656 - val_loss: 8.6605 - val_perplexity_phase_i_b: 5770.2129\n", + "Epoch 21/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 564ms/step - categorical_accuracy: 0.1424 - loss: 4.0012 - perplexity_phase_i_b: 55.2548 - val_categorical_accuracy: 0.0820 - val_loss: 8.6945 - val_perplexity_phase_i_b: 5970.1401\n", + "Epoch 22/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 589ms/step - categorical_accuracy: 0.1520 - loss: 4.1843 - perplexity_phase_i_b: 66.0555 - val_categorical_accuracy: 0.0656 - val_loss: 8.3286 - val_perplexity_phase_i_b: 4140.4941\n", + "Epoch 23/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 557ms/step - categorical_accuracy: 0.1807 - loss: 3.8604 - perplexity_phase_i_b: 48.0663 - val_categorical_accuracy: 0.0656 - val_loss: 8.6137 - val_perplexity_phase_i_b: 5506.4224\n", + "Epoch 24/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 608ms/step - categorical_accuracy: 0.1533 - loss: 3.9858 - perplexity_phase_i_b: 54.5812 - val_categorical_accuracy: 0.1148 - val_loss: 8.5935 - val_perplexity_phase_i_b: 5396.4331\n", + "Epoch 25/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.1230 - loss: 4.0118 - perplexity_phase_i_b: 55.6288 - val_categorical_accuracy: 0.1475 - val_loss: 8.6210 - val_perplexity_phase_i_b: 5547.1172\n", + "Epoch 26/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 564ms/step - categorical_accuracy: 0.1588 - loss: 3.8591 - perplexity_phase_i_b: 47.8675 - val_categorical_accuracy: 0.1148 - val_loss: 8.4999 - val_perplexity_phase_i_b: 4914.4688\n", + "Epoch 27/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 582ms/step - categorical_accuracy: 0.1900 - loss: 3.8535 - perplexity_phase_i_b: 47.2824 - val_categorical_accuracy: 0.0820 - val_loss: 8.7680 - val_perplexity_phase_i_b: 6425.2207\n", + "Epoch 28/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 593ms/step - categorical_accuracy: 0.1927 - loss: 3.6720 - perplexity_phase_i_b: 39.7386 - val_categorical_accuracy: 0.0656 - val_loss: 8.7999 - val_perplexity_phase_i_b: 6633.3721\n", + "Epoch 29/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 594ms/step - categorical_accuracy: 0.1848 - loss: 3.8259 - perplexity_phase_i_b: 46.2804 - val_categorical_accuracy: 0.0656 - val_loss: 8.6051 - val_perplexity_phase_i_b: 5459.4458\n", + "Epoch 30/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 597ms/step - categorical_accuracy: 0.1691 - loss: 3.6890 - perplexity_phase_i_b: 40.3801 - val_categorical_accuracy: 0.0984 - val_loss: 8.5689 - val_perplexity_phase_i_b: 5265.4810\n", + "Epoch 31/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 587ms/step - categorical_accuracy: 0.1774 - loss: 3.6971 - perplexity_phase_i_b: 40.6956 - val_categorical_accuracy: 0.0984 - val_loss: 8.7037 - val_perplexity_phase_i_b: 6025.3599\n", + "Epoch 32/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.1597 - loss: 3.6218 - perplexity_phase_i_b: 37.8592 - val_categorical_accuracy: 0.0984 - val_loss: 8.7827 - val_perplexity_phase_i_b: 6520.5991\n", + "Epoch 33/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 573ms/step - categorical_accuracy: 0.2066 - loss: 3.6265 - perplexity_phase_i_b: 38.0441 - val_categorical_accuracy: 0.0984 - val_loss: 8.7695 - val_perplexity_phase_i_b: 6434.8853\n", + "Epoch 34/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m42s\u001b[0m 550ms/step - categorical_accuracy: 0.1622 - loss: 3.7388 - perplexity_phase_i_b: 42.4272 - val_categorical_accuracy: 0.1148 - val_loss: 8.6601 - val_perplexity_phase_i_b: 5768.0454\n", + "Epoch 35/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m41s\u001b[0m 537ms/step - categorical_accuracy: 0.1974 - loss: 3.4737 - perplexity_phase_i_b: 32.6702 - val_categorical_accuracy: 0.1148 - val_loss: 8.6486 - val_perplexity_phase_i_b: 5702.0361\n", + "Epoch 36/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 603ms/step - categorical_accuracy: 0.1640 - loss: 3.5527 - perplexity_phase_i_b: 35.4395 - val_categorical_accuracy: 0.1148 - val_loss: 8.7015 - val_perplexity_phase_i_b: 6011.7910\n", + "Epoch 37/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1779 - loss: 3.5903 - perplexity_phase_i_b: 36.4963 - val_categorical_accuracy: 0.1148 - val_loss: 8.7223 - val_perplexity_phase_i_b: 6138.1729\n", + "Epoch 38/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m96s\u001b[0m 598ms/step - categorical_accuracy: 0.1935 - loss: 3.5401 - perplexity_phase_i_b: 34.7298 - val_categorical_accuracy: 0.1148 - val_loss: 8.6995 - val_perplexity_phase_i_b: 5999.7402\n", + "Epoch 39/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m48s\u001b[0m 622ms/step - categorical_accuracy: 0.2109 - loss: 3.5383 - perplexity_phase_i_b: 34.5639 - val_categorical_accuracy: 0.1148 - val_loss: 8.6650 - val_perplexity_phase_i_b: 5796.6436\n", + "Epoch 40/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m42s\u001b[0m 555ms/step - categorical_accuracy: 0.2047 - loss: 3.5124 - perplexity_phase_i_b: 33.9720 - val_categorical_accuracy: 0.1148 - val_loss: 8.7431 - val_perplexity_phase_i_b: 6267.4624\n", + "Epoch 41/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m44s\u001b[0m 576ms/step - categorical_accuracy: 0.1514 - loss: 3.5711 - perplexity_phase_i_b: 35.7887 - val_categorical_accuracy: 0.0656 - val_loss: 8.9814 - val_perplexity_phase_i_b: 7953.5283\n", + "Epoch 42/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1761 - loss: 3.6074 - perplexity_phase_i_b: 37.1983 - val_categorical_accuracy: 0.0984 - val_loss: 9.0303 - val_perplexity_phase_i_b: 8352.2227\n", + "Epoch 43/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m43s\u001b[0m 571ms/step - categorical_accuracy: 0.1727 - loss: 3.6003 - perplexity_phase_i_b: 36.7872 - val_categorical_accuracy: 0.0328 - val_loss: 8.9927 - val_perplexity_phase_i_b: 8044.2207\n", + "Epoch 44/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m86s\u001b[0m 619ms/step - categorical_accuracy: 0.1786 - loss: 3.7416 - perplexity_phase_i_b: 42.6958 - val_categorical_accuracy: 0.1148 - val_loss: 9.1039 - val_perplexity_phase_i_b: 8990.1494\n", + "Epoch 45/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 594ms/step - categorical_accuracy: 0.2062 - loss: 3.6020 - perplexity_phase_i_b: 37.0046 - val_categorical_accuracy: 0.0984 - val_loss: 9.3867 - val_perplexity_phase_i_b: 11928.1768\n", + "Epoch 46/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 583ms/step - categorical_accuracy: 0.2035 - loss: 3.6276 - perplexity_phase_i_b: 37.9026 - val_categorical_accuracy: 0.0820 - val_loss: 9.5581 - val_perplexity_phase_i_b: 14159.1719\n", + "Epoch 47/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 590ms/step - categorical_accuracy: 0.1784 - loss: 3.4276 - perplexity_phase_i_b: 31.0932 - val_categorical_accuracy: 0.1148 - val_loss: 9.1575 - val_perplexity_phase_i_b: 9485.0088\n", + "Epoch 48/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m45s\u001b[0m 583ms/step - categorical_accuracy: 0.1864 - loss: 3.4227 - perplexity_phase_i_b: 31.1301 - val_categorical_accuracy: 0.1148 - val_loss: 9.1156 - val_perplexity_phase_i_b: 9095.7666\n", + "Epoch 49/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 622ms/step - categorical_accuracy: 0.2266 - loss: 3.4226 - perplexity_phase_i_b: 30.7439 - val_categorical_accuracy: 0.0820 - val_loss: 9.4648 - val_perplexity_phase_i_b: 12897.0039\n", + "Epoch 50/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m95s\u001b[0m 589ms/step - categorical_accuracy: 0.2455 - loss: 3.4171 - perplexity_phase_i_b: 30.9408 - val_categorical_accuracy: 0.0820 - val_loss: 9.4194 - val_perplexity_phase_i_b: 12325.3525\n", + "Epoch 51/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m46s\u001b[0m 596ms/step - categorical_accuracy: 0.2168 - loss: 3.2941 - perplexity_phase_i_b: 27.1144 - val_categorical_accuracy: 0.0984 - val_loss: 9.3049 - val_perplexity_phase_i_b: 10991.2559\n", + "Epoch 52/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m49s\u001b[0m 642ms/step - categorical_accuracy: 0.1940 - loss: 3.3548 - perplexity_phase_i_b: 28.8572 - val_categorical_accuracy: 0.0984 - val_loss: 9.1126 - val_perplexity_phase_i_b: 9068.9150\n", + "Epoch 53/53\n", + "\u001b[1m76/76\u001b[0m \u001b[32mโ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\u001b[0m\u001b[37m\u001b[0m \u001b[1m47s\u001b[0m 610ms/step - categorical_accuracy: 0.2291 - loss: 3.3674 - perplexity_phase_i_b: 29.2831 - val_categorical_accuracy: 0.1311 - val_loss: 9.1200 - val_perplexity_phase_i_b: 9136.3135\n", + "Restoring model weights from the end of the best epoch: 53.\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'Result of Stage 1-b training 29.637819290161133'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Stage I-b: Model Evaluation and Serialization\n", + "\n", + "After extended training, we evaluate the final model performance and save the model and tokenizer for future use.\n" + ], + "metadata": { + "id": "y8Ej2P7D0T8R" + } + }, + { + "cell_type": "markdown", + "source": [ + "# Final Generation Tests on the Stage I-b model checkpoint\n", + "\n", + "Confirm the model works after Stage I-b training." + ], + "metadata": { + "id": "dWlYvYBq0dio" + } + }, + { + "cell_type": "code", + "source": [ + "print(\"########### Phase I-b Model Checkpoint Generation Samples: ###########\")\n", + "\n", + "counter = 0\n", + "for sample in prompt_samples:\n", + " test_text(\n", + " test_prompt=sample,\n", + " max_new_tokens=MAX_NEW_TOKENS,\n", + " result_cutoff=60, #\n", + " trial_id=trial_number,\n", + " test_sample_number=counter,\n", + " result_0=result_phase_i_b\n", + " )\n", + " counter += 1\n" + ], + "metadata": { + "id": "YhGaTbGF0X_d", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8071bc5a-8520-4d13-82e1-cbd941297b4b" + }, + "execution_count": 26, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "########### Phase I-b Model Checkpoint Generation Samples: ###########\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ',,, and fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 5 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for morning, over tree with, fruit lights bring fruit great livestock.''.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', serve'to lights produce according each kind'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', greater and that creeping waters for fifth'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for, lights bird produce fourth to its with'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: 'Be and, image said birds creeping day. God'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 4 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' night, image to over fish creature earth.''\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for, to birds bird, according forth every.' man animals'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', for plant domin fruition day with'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 12 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 10 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' for'lights, great waters eachBe.' fruit also'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', saying produce livestock for every lights-bearing day fifth give to.''\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ', fifth give to livestock light fruitful its that day every so.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 65 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 68 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 68 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 66 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 59 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 14 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 17 non-zero probs\n", + "Trial #: 1 Text Sample #: 0 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'I saw the sun and it was as shining on the' RESPONSE: ' waters,, for to bring its, fruit.' kind.' and its wild'\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: 'And God said, Let there be light: and there,, and fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit fruit'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' to each the kind fruit that in great birds day. its'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 24 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man was forth fruit great with lesser thing animals'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man to each thing multiply the so fruit in that as saw'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 17 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' fly created the so.' livestock fruit according'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 58 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' each fruitful-bearing in animals as man was'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 1 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' that themBeh fruit man in great according forth signs fruit.''\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 13 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 22 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' them. fruit'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' each created so animals'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 14 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man-bearing lesser so animals each.' wild'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 17 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' to man each bring kind fruit forth. its animals'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 18 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' so man each. fruit in as'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 65 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 67 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 66 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 66 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + "Trial #: 1 Text Sample #: 1 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'And God said, Let there be light: and there ' RESPONSE: ' man-bearing said forth so in them according signs fruit'\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE SAMPLING PARAMS: Greedy max_new_tokens=10 otherwise - N/A: PROMPT: 'In the beginning God created the heavens' RESPONSE: ',,,,, and day day day lesser'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: Beam Default - max_new_tokens = 10, temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None, presence_penalty=1.3, frequency_penalty=1.4: PROMPT: 'In the beginning God created the heavens' RESPONSE: ',Let and was living he lesser so multiply seed fruitful livestock.'.\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' set, and said them over was man each it'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and fruitful, to was forth them'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 47 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.97, repetition_penalty=None presence_penalty=1.3 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', earth trees seed was and day good rule forth.'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 54 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 50 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 48 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 49 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 21 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.75, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees them said he day good upon,' thing. fruit'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 46 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 39 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees was day to seed lesser he living earth each'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 8 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.98, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees was he said day. each living he fruit so'\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 15 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 25 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 22 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 20 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 24 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 27 non-zero probs\n", + ">>> After top_k: [128260] shape, 40 non-zero probs\n", + ">>> After top_p: [128260] shape, 24 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=40, top_p=0.96, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', it earth creatures day living man lesser and he each'\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 34 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 45 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.7, top_k=45, top_p=0.97, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.3 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' and was living day he rule trees., according'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 35 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 51 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 38 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 40 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 37 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.6, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ', and trees it said upon man was forth day fruit each tree wing multiply'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 36 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 41 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 52 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 45 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 44 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 42 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 43 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.65, top_k=75, top_p=0.985, repetition_penalty=None presence_penalty=1.4 frequency_penalty1.4 PROMPT: 'In the beginning God created the heavens' RESPONSE: ' waters, and was so according each lesser multiply'\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 55 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 63 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 64 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 62 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 61 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 60 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 57 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 56 non-zero probs\n", + ">>> After top_k: [128260] shape, 75 non-zero probs\n", + ">>> After top_p: [128260] shape, 53 non-zero probs\n", + "Trial #: 1 Text Sample #: 2 Perplexity: 29.637819290161133 GENERATE PARAMS: max_new_tokens=15 temperature=0.8, top_k=75, top_p=0.99, repetition_penalty=None presence_penalty=0.7 frequency_penalty0.7 PROMPT: 'In the beginning God created the heavens' RESPONSE: ',ed and to be it, multiply fruitful each'\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Save Model and Tokenizer\n", + "\n", + "- Finally, we save the tokenizer and the trained model weights to disk." + ], + "metadata": { + "id": "-oCAeR4n0mPW" + } + }, + { + "cell_type": "code", + "source": [ + "trial_number = 1 # Make sure to set this to a unique number:\n", + "# Serialize tokenizer\n", + "TOKENIZER_SAVE_PATH = f\"tokenizer-tr-{trial_number}-stage-i-b\"\n", + "tokenizer.save_pretrained(TOKENIZER_SAVE_PATH)\n", + "print(f\"Tokenizer saved to {TOKENIZER_SAVE_PATH}\")\n", + "\n", + "# Serialize model\n", + "MODEL_SAVE_PATH = f\"final_phase_ib_model_tr_{trial_number}-stage-i-b.keras\"\n", + "generator.save(MODEL_SAVE_PATH)\n", + "print(f\"Final model saved to {MODEL_SAVE_PATH}\")\n" + ], + "metadata": { + "id": "ziYdmmII0qfu", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "37a1153f-09a0-4274-9ca2-e280112e65e6" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Tokenizer saved to tokenizer-tr-1-stage-i-b\n", + "Final model saved to final_phase_ib_model_tr_1-stage-i-b.keras\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Serialization Test\n", + "\n", + "- We run an external script (test_llm_serialization.py) to validate that the saved model and tokenizer can be loaded and used correctly." + ], + "metadata": { + "id": "y9Pvhcvl0uGt" + } + }, + { + "cell_type": "code", + "source": [ + "print(f\"๐Ÿงช Running serialization test for Stage I-b trial {trial_number}...\")\n", + "result = subprocess.run(\n", + " f\"python3 test_llm_serialization.py {TOKENIZER_SAVE_PATH} {MODEL_SAVE_PATH}\",\n", + " capture_output=True,\n", + " shell=True,\n", + " text=True # Use text=True for string output\n", + ")\n", + "\n", + "if result.returncode == 0:\n", + " print(\"โœ… Serialization test passed.\")\n", + " print(\"STDOUT:\", result.stdout)\n", + "else:\n", + " print(\"โŒ Serialization test failed.\")\n", + " print(\"STDERR:\", result.stderr)\n", + " if result.stdout:\n", + " print(\"STDOUT:\", result.stdout)\n" + ], + "metadata": { + "id": "qA5Cord40yID", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "389fe0bf-c935-4f49-dd4f-8eea8672c634" + }, + "execution_count": 28, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "๐Ÿงช Running serialization test for Stage I-b trial 1...\n", + "โœ… Serialization test passed.\n", + "STDOUT: โœ… Tokenizer loaded successfully.\n", + "โœ… CerebrosNotGPT model loaded successfully.\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 19 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 29 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 26 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 31 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 30 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 28 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 32 non-zero probs\n", + ">>> After top_k: [128260] shape, 50 non-zero probs\n", + ">>> After top_p: [128260] shape, 33 non-zero probs\n", + "๐Ÿง  (serialized) Prompt: In the beginning God created the Generated Text from Serialized Model: 'In the beginning God created the, waters each trees and to living man according them'\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# And there you have it: What it takes to build an LLM from scratch using our novel architecture.\n" + ], + "metadata": { + "id": "z1lSMQ6i03XC" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "W6lcAxij-Z5r" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From b05b98ddc158b14142d3dcda859263481f4346d6 Mon Sep 17 00:00:00 2001 From: David Thrower Date: Mon, 24 Nov 2025 19:21:53 -0500 Subject: [PATCH 4/4] Update automerge.yml Trigger CICD tests to run. --- .github/workflows/automerge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml index 81232c1..7508e7f 100644 --- a/.github/workflows/automerge.yml +++ b/.github/workflows/automerge.yml @@ -6,7 +6,7 @@ name: Python application on: push: - branches: [ "main", "279-add-a-jupyter-notebook-for-llm-training" ] + branches: [ "main", "281-fix-misspelling-in-llm-from-scratch-jupyter" ] permissions: