diff --git a/.github/workflows/automerge.yml b/.github/workflows/automerge.yml
index 81232c1..7508e7f 100644
--- a/.github/workflows/automerge.yml
+++ b/.github/workflows/automerge.yml
@@ -6,7 +6,7 @@ name: Python application
on:
push:
- branches: [ "main", "279-add-a-jupyter-notebook-for-llm-training" ]
+ branches: [ "main", "281-fix-misspelling-in-llm-from-scratch-jupyter" ]
permissions:
diff --git a/2025_11_23_demo_train_an_llm_with_cerebros.ipynb b/2025_11_23_demo_train_an_llm_with_cerebros.ipynb
index 9004212..d1f0f28 100644
--- a/2025_11_23_demo_train_an_llm_with_cerebros.ipynb
+++ b/2025_11_23_demo_train_an_llm_with_cerebros.ipynb
@@ -48,7 +48,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
@@ -91,7 +91,7 @@
"id": "AcECFSs7WVsi",
"outputId": "9fd59935-35d4-4a08-9c8a-fb01fd3e4f03"
},
- "execution_count": 25,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -121,7 +121,7 @@
"id": "mCpJGfD2WfLj",
"outputId": "e0fe8c05-6154-41cd-f489-08cfd2ad0fa8"
},
- "execution_count": 26,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -159,7 +159,7 @@
"id": "nwElyEdpW90P",
"outputId": "170e2158-b7a9-49f0-ce63-22c4c7410f33"
},
- "execution_count": 27,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -403,7 +403,7 @@
"id": "ubtKyfBQzFEW",
"outputId": "6cbe44e6-3ce7-4227-982a-88d0d36d2205"
},
- "execution_count": 1,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -431,7 +431,7 @@
"id": "NemXTsYgfE0s",
"outputId": "ca92342f-1f82-42ee-8562-980b1c8dd849"
},
- "execution_count": 2,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -455,7 +455,7 @@
"id": "D3K4dSVQhrIc",
"outputId": "5a45fa94-1bb3-46ce-c362-27f456221fd6"
},
- "execution_count": 3,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -509,7 +509,7 @@
"id": "WKCdCv96X4YX",
"outputId": "875f6626-4f4b-426c-c697-da9f186e440a"
},
- "execution_count": 4,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -557,7 +557,7 @@
"metadata": {
"id": "vywbZQxAZC9R"
},
- "execution_count": 5,
+ "execution_count": null,
"outputs": []
},
{
@@ -608,7 +608,7 @@
},
"outputId": "6c85d1ae-52f4-4ddf-d768-ea5781b1b7da"
},
- "execution_count": 6,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -681,13 +681,13 @@
"metadata": {
"id": "Wbowkxnbc4Zd"
},
- "execution_count": 7,
+ "execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
- "# Phase I-b (Extended Training) Hyperparameters\n",
+ "# Stage I-b (Extended Training) Hyperparameters\n",
"\n",
"These parameters are for fine-tuning the best model from Stage I-a.\n",
"\n",
@@ -716,7 +716,7 @@
"metadata": {
"id": "-znwaddIdiKU"
},
- "execution_count": 8,
+ "execution_count": null,
"outputs": []
},
{
@@ -741,7 +741,7 @@
"metadata": {
"id": "JHjCz9qXd5Gq"
},
- "execution_count": 9,
+ "execution_count": null,
"outputs": []
},
{
@@ -774,7 +774,7 @@
},
"outputId": "d46f8e34-3d7d-4fb4-dddc-bf1c45bae7ee"
},
- "execution_count": 10,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -930,7 +930,7 @@
"metadata": {
"id": "EDyuTMLufYvs"
},
- "execution_count": 11,
+ "execution_count": null,
"outputs": []
},
{
@@ -958,7 +958,7 @@
"metadata": {
"id": "SMSdkFRPkg7D"
},
- "execution_count": 12,
+ "execution_count": null,
"outputs": []
},
{
@@ -973,7 +973,7 @@
"id": "Oqw-T7bOo1GD",
"outputId": "2e8f24fc-24c2-4a06-babb-550b676b7751"
},
- "execution_count": 13,
+ "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -1001,7 +1001,7 @@
"id": "Hv_52izIjOQ7",
"outputId": "e2972924-0190-4f16-9317-c00100486203"
},
- "execution_count": 14,
+ "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -1149,7 +1149,7 @@
},
"outputId": "e76e091c-6e7f-4820-ef79-15143f1e6b64"
},
- "execution_count": 15,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -1372,7 +1372,7 @@
"metadata": {
"id": "_8uTBW_to7iQ"
},
- "execution_count": 16,
+ "execution_count": null,
"outputs": []
},
{
@@ -1474,7 +1474,7 @@
"metadata": {
"id": "XV2q_5WEwBJ0"
},
- "execution_count": 17,
+ "execution_count": null,
"outputs": []
},
{
@@ -1510,7 +1510,7 @@
},
"outputId": "d56dd1ec-2f7b-4a3c-ecc6-75e595910367"
},
- "execution_count": 18,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -4269,7 +4269,7 @@
},
"outputId": "d253eeeb-831e-48ce-f256-c8f10540064a"
},
- "execution_count": 19,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -4348,7 +4348,7 @@
"metadata": {
"id": "f8XigcJcykLn"
},
- "execution_count": 20,
+ "execution_count": null,
"outputs": []
},
{
@@ -4534,7 +4534,7 @@
},
"outputId": "e05a9fb1-706e-4f26-e668-825f7df940c2"
},
- "execution_count": 21,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -4968,7 +4968,7 @@
{
"cell_type": "markdown",
"source": [
- "# Syage I-b: Extended Training\n",
+ "# Stage I-b: Extended Training\n",
"\n",
"- Now, we take the best model from Stage I-a and continue training it on a larger dataset.\n",
"- This uses a streaming `tf.data.Dataset` generator to allow handling of much larger data sets without using more RAM.\n",
@@ -5119,7 +5119,7 @@
"metadata": {
"id": "MHWWE0xIzLRD"
},
- "execution_count": 22,
+ "execution_count": null,
"outputs": []
},
{
@@ -5135,7 +5135,7 @@
"id": "HxwyQzSppQwp",
"outputId": "89a48aa5-c364-4057-98c4-fc4a291f448e"
},
- "execution_count": 23,
+ "execution_count": null,
"outputs": [
{
"output_type": "execute_result",
@@ -5190,7 +5190,7 @@
"cell_type": "markdown",
"source": [
"\n",
- "## Model Compilation for Phase I-b\n",
+ "## Model Compilation for Stage I-b\n",
"\n",
"- We recompile the model with the same base optimizer (AdamW), however this time with a custom learning rate scheduler (WarmupCosineDecayRestarts), and for disambiguation, relevant metrics for this training phase. We also add an EarlyStopping callback which is mainly being used to restore the weights from the best epoch, if that turns out to not be the last epoch.\n",
"\n",
@@ -5328,7 +5328,7 @@
"metadata": {
"id": "GGkEVa2dzOtf"
},
- "execution_count": 24,
+ "execution_count": null,
"outputs": []
},
{
@@ -5376,7 +5376,7 @@
},
"outputId": "0daf05b2-7072-4818-8b47-a05558b33470"
},
- "execution_count": 25,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -5567,7 +5567,7 @@
},
"outputId": "8071bc5a-8520-4d13-82e1-cbd941297b4b"
},
- "execution_count": 26,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -6453,7 +6453,7 @@
},
"outputId": "37a1153f-09a0-4274-9ca2-e280112e65e6"
},
- "execution_count": 27,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
@@ -6503,7 +6503,7 @@
},
"outputId": "389fe0bf-c935-4f49-dd4f-8eea8672c634"
},
- "execution_count": 28,
+ "execution_count": null,
"outputs": [
{
"output_type": "stream",
diff --git a/old/2025_11_23_demo_train_an_llm_with_cerebros.ipynb b/old/2025_11_23_demo_train_an_llm_with_cerebros.ipynb
new file mode 100644
index 0000000..9004212
--- /dev/null
+++ b/old/2025_11_23_demo_train_an_llm_with_cerebros.ipynb
@@ -0,0 +1,6561 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Build our LLM From Scratch -\n",
+ "\n",
+ "## How Cerebros NotGPT works under the hood:\n",
+ "\n",
+ "\n",
+ "### This notebook demonstrates the end-to-end training pipeline that builds a small scale generative LLM from scratch, a small scale proof of concept for our own Cerebros NotGPT model, then fine tunes it on additional data.\n",
+ "\n",
+ "The process is divided into two main phases:\n",
+ "\n",
+ "- Phase I-a: Neural Architecture Search (NAS) - We use SimpleCerebrosRandomSearch to automatically discover an effective neural network architecture from a small dataset.\n",
+ "- Phase I-b: Extended Training - The best architecture found in Phase I-a is then trained on a larger dataset to improve its performance.\n",
+ "\n",
+ "Finally, the trained model is evaluated and serialized for future use.\n",
+ "\n",
+ "\n",
+ "## Setup and Configuration\n",
+ "\n",
+ "Note: This script is configured as a vanilla-scale demo environment (4 CPU / 16 GB RAM Linux with Python 3.12). No GPU is needed, and this will run in the free version of Google Colab. \n",
+ "\n",
+ "## Vanilla Demo\n",
+ "\n",
+ "- For production use, you would significantly increase the sample sizes and adjust other parameters accordingly.\n",
+ "- The quality of the text generated by this minimal demo (trained on 30 text samples at a sequence length of 40) does not represent the quality of a full-scale model generated from the same code.\n",
+ "- A script that can be modified to do such as availible at: https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/main/train_a_generative_llm.py"
+ ],
+ "metadata": {
+ "id": "nnsAHoJyWLed"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NzJF6_JuWElV",
+ "outputId": "a0f3246f-0ccd-48ea-da55-86479bc0f93c"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Python 3.12.12\n"
+ ]
+ }
+ ],
+ "source": [
+ "! python --version"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Getting started: Download the repo and go to the main directory of the repo"
+ ],
+ "metadata": {
+ "id": "f6TD2XsKPJIY"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Download the repo\n",
+ "! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "AcECFSs7WVsi",
+ "outputId": "9fd59935-35d4-4a08-9c8a-fb01fd3e4f03"
+ },
+ "execution_count": 25,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Cloning into 'cerebros-core-algorithm-alpha'...\n",
+ "remote: Enumerating objects: 8036, done.\u001b[K\n",
+ "remote: Counting objects: 100% (1737/1737), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (321/321), done.\u001b[K\n",
+ "remote: Total 8036 (delta 1612), reused 1449 (delta 1411), pack-reused 6299 (from 2)\u001b[K\n",
+ "Receiving objects: 100% (8036/8036), 65.90 MiB | 21.67 MiB/s, done.\n",
+ "Resolving deltas: 100% (3116/3116), done.\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# set the working directory\n",
+ "%cd cerebros-core-algorithm-alpha"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mCpJGfD2WfLj",
+ "outputId": "e0fe8c05-6154-41cd-f489-08cfd2ad0fa8"
+ },
+ "execution_count": 26,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/cerebros-core-algorithm-alpha/cerebros-core-algorithm-alpha\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Next install all dependencies.\n",
+ "\n",
+ "There are 2 requirement files:\n",
+ " - requirements.txt: The core requirements of the neural architecture search\n",
+ " - cicd-requirements.txt: Requirements for NLP and text generation"
+ ],
+ "metadata": {
+ "id": "yT4hPXOKPU_8"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Install the requirements for the core algorithm\n",
+ "! pip install -r requirements.txt; pip install -r cicd-requirements.txt"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "nwElyEdpW90P",
+ "outputId": "170e2158-b7a9-49f0-ce63-22c4c7410f33"
+ },
+ "execution_count": 27,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: jax==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 1)) (0.5.3)\n",
+ "Requirement already satisfied: jaxlib==0.5.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 2)) (0.5.3)\n",
+ "Requirement already satisfied: pendulum==3.0.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 3)) (3.0.0)\n",
+ "Collecting tensorflow==2.20.0 (from -r requirements.txt (line 4))\n",
+ " Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n",
+ "Collecting numpy==2.3.5 (from -r requirements.txt (line 5))\n",
+ " Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n",
+ "Requirement already satisfied: pandas==2.3.3 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 6)) (2.3.3)\n",
+ "Requirement already satisfied: pyvis==0.3.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 7)) (0.3.2)\n",
+ "Requirement already satisfied: plotly==5.20.0 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 8)) (5.20.0)\n",
+ "Requirement already satisfied: matplotlib==3.10.7 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 9)) (3.10.7)\n",
+ "Requirement already satisfied: imageio==2.37.2 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 10)) (2.37.2)\n",
+ "Requirement already satisfied: tqdm==4.67.1 in /usr/local/lib/python3.12/dist-packages (from -r requirements.txt (line 11)) (4.67.1)\n",
+ "Requirement already satisfied: ml_dtypes>=0.4.0 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (0.5.4)\n",
+ "Requirement already satisfied: opt_einsum in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (3.4.0)\n",
+ "Requirement already satisfied: scipy>=1.11.1 in /usr/local/lib/python3.12/dist-packages (from jax==0.5.3->-r requirements.txt (line 1)) (1.16.3)\n",
+ "Requirement already satisfied: python-dateutil>=2.6 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2.9.0.post0)\n",
+ "Requirement already satisfied: tzdata>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (2025.2)\n",
+ "Requirement already satisfied: time-machine>=2.6.0 in /usr/local/lib/python3.12/dist-packages (from pendulum==3.0.0->-r requirements.txt (line 3)) (3.1.0)\n",
+ "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.4.0)\n",
+ "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.6.3)\n",
+ "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.9.23)\n",
+ "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.6.0)\n",
+ "Requirement already satisfied: google_pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (0.2.0)\n",
+ "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (18.1.1)\n",
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (25.0)\n",
+ "Requirement already satisfied: protobuf>=5.28.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (5.29.5)\n",
+ "Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.32.4)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (75.2.0)\n",
+ "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.17.0)\n",
+ "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.2.0)\n",
+ "Requirement already satisfied: typing_extensions>=3.6.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (4.15.0)\n",
+ "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (2.0.1)\n",
+ "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (1.76.0)\n",
+ "Collecting tensorboard~=2.20.0 (from tensorflow==2.20.0->-r requirements.txt (line 4))\n",
+ " Using cached tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n",
+ "Requirement already satisfied: keras>=3.10.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10.0)\n",
+ "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow==2.20.0->-r requirements.txt (line 4)) (3.15.1)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas==2.3.3->-r requirements.txt (line 6)) (2025.2)\n",
+ "Requirement already satisfied: ipython>=5.3.0 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (7.34.0)\n",
+ "Requirement already satisfied: jinja2>=2.9.6 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.1.6)\n",
+ "Requirement already satisfied: jsonpickle>=1.4.1 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (4.1.1)\n",
+ "Requirement already satisfied: networkx>=1.11 in /usr/local/lib/python3.12/dist-packages (from pyvis==0.3.2->-r requirements.txt (line 7)) (3.5)\n",
+ "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.12/dist-packages (from plotly==5.20.0->-r requirements.txt (line 8)) (8.5.0)\n",
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.3.3)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (0.12.1)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (4.60.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (1.4.9)\n",
+ "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (11.3.0)\n",
+ "Requirement already satisfied: pyparsing>=3 in /usr/local/lib/python3.12/dist-packages (from matplotlib==3.10.7->-r requirements.txt (line 9)) (3.2.5)\n",
+ "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.45.1)\n",
+ "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.19.2)\n",
+ "Requirement already satisfied: decorator in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.4.2)\n",
+ "Requirement already satisfied: pickleshare in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.5)\n",
+ "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (5.7.1)\n",
+ "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.52)\n",
+ "Requirement already satisfied: pygments in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (2.19.2)\n",
+ "Requirement already satisfied: backcall in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.0)\n",
+ "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.1)\n",
+ "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.12/dist-packages (from ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (4.9.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.12/dist-packages (from jinja2>=2.9.6->pyvis==0.3.2->-r requirements.txt (line 7)) (3.0.3)\n",
+ "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (13.9.4)\n",
+ "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.0)\n",
+ "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.18.0)\n",
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.4.4)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.11)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2.5.0)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.21.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (2025.11.12)\n",
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.10)\n",
+ "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.7.2)\n",
+ "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.20.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (3.1.3)\n",
+ "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.12/dist-packages (from jedi>=0.16->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.8.5)\n",
+ "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.12/dist-packages (from pexpect>4.3->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.7.0)\n",
+ "Requirement already satisfied: wcwidth in /usr/local/lib/python3.12/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->pyvis==0.3.2->-r requirements.txt (line 7)) (0.2.14)\n",
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (4.0.0)\n",
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow==2.20.0->-r requirements.txt (line 4)) (0.1.2)\n",
+ "Using cached tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.7 MB)\n",
+ "Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.6 MB)\n",
+ "Using cached tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n",
+ "Installing collected packages: numpy, tensorboard, tensorflow\n",
+ " Attempting uninstall: numpy\n",
+ " Found existing installation: numpy 1.26.4\n",
+ " Uninstalling numpy-1.26.4:\n",
+ " Successfully uninstalled numpy-1.26.4\n",
+ " Attempting uninstall: tensorboard\n",
+ " Found existing installation: tensorboard 2.19.0\n",
+ " Uninstalling tensorboard-2.19.0:\n",
+ " Successfully uninstalled tensorboard-2.19.0\n",
+ " Attempting uninstall: tensorflow\n",
+ " Found existing installation: tensorflow 2.19.1\n",
+ " Uninstalling tensorflow-2.19.1:\n",
+ " Successfully uninstalled tensorflow-2.19.1\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "scikit-learn 1.4.1.post1 requires numpy<2.0,>=1.19.5, but you have numpy 2.3.5 which is incompatible.\n",
+ "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n",
+ "tensorflow-text 2.19.0 requires tensorflow<2.20,>=2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n",
+ "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n",
+ "numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.3.5 which is incompatible.\n",
+ "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n",
+ "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n",
+ "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 2.3.5 which is incompatible.\n",
+ "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n",
+ "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.20.0 which is incompatible.\n",
+ "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n",
+ "tf-keras 2.19.0 requires tensorflow<2.20,>=2.19, but you have tensorflow 2.20.0 which is incompatible.\n",
+ "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed numpy-2.3.5 tensorboard-2.20.0 tensorflow-2.20.0\n",
+ "Requirement already satisfied: tensorflow-text==2.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 1)) (2.19.0)\n",
+ "Requirement already satisfied: keras-nlp==0.19.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 2)) (0.19.0)\n",
+ "Requirement already satisfied: scikit-learn==1.4.1.post1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 3)) (1.4.1.post1)\n",
+ "Requirement already satisfied: tensorflow-hub==0.16.1 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 4)) (0.16.1)\n",
+ "Requirement already satisfied: transformers==4.54.0 in /usr/local/lib/python3.12/dist-packages (from -r cicd-requirements.txt (line 5)) (4.54.0)\n",
+ "Collecting tensorflow<2.20,>=2.19.0 (from tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n",
+ " Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)\n",
+ "Requirement already satisfied: keras-hub==0.19.0 in /usr/local/lib/python3.12/dist-packages (from keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.19.0)\n",
+ "Collecting numpy<2.0,>=1.19.5 (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3))\n",
+ " Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n",
+ "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.16.3)\n",
+ "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (1.5.2)\n",
+ "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn==1.4.1.post1->-r cicd-requirements.txt (line 3)) (3.6.0)\n",
+ "Requirement already satisfied: protobuf>=3.19.6 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (5.29.5)\n",
+ "Requirement already satisfied: tf-keras>=2.14.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow-hub==0.16.1->-r cicd-requirements.txt (line 4)) (2.19.0)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.20.0)\n",
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.36.0)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (25.0)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (6.0.3)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2024.11.6)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.32.4)\n",
+ "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.21.4)\n",
+ "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (0.7.0)\n",
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.12/dist-packages (from transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.67.1)\n",
+ "Requirement already satisfied: keras>=3.5 in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (3.10.0)\n",
+ "Requirement already satisfied: absl-py in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (1.4.0)\n",
+ "Requirement already satisfied: rich in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (13.9.4)\n",
+ "Requirement already satisfied: kagglehub in /usr/local/lib/python3.12/dist-packages (from keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.3.13)\n",
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.3.0)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (4.15.0)\n",
+ "Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in /usr/local/lib/python3.12/dist-packages (from huggingface-hub<1.0,>=0.34.0->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (1.2.0)\n",
+ "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.6.3)\n",
+ "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (25.9.23)\n",
+ "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.6.0)\n",
+ "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.2.0)\n",
+ "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (18.1.1)\n",
+ "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.4.0)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (75.2.0)\n",
+ "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.17.0)\n",
+ "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.2.0)\n",
+ "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (2.0.1)\n",
+ "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (1.76.0)\n",
+ "Collecting tensorboard~=2.19.0 (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1))\n",
+ " Using cached tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)\n",
+ "Requirement already satisfied: h5py>=3.11.0 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.15.1)\n",
+ "Requirement already satisfied: ml-dtypes<1.0.0,>=0.5.1 in /usr/local/lib/python3.12/dist-packages (from tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.5.4)\n",
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.4.4)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (3.11)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2.5.0)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->transformers==4.54.0->-r cicd-requirements.txt (line 5)) (2025.11.12)\n",
+ "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from astunparse>=1.6.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.45.1)\n",
+ "Requirement already satisfied: namex in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.0)\n",
+ "Requirement already satisfied: optree in /usr/local/lib/python3.12/dist-packages (from keras>=3.5->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.18.0)\n",
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.10)\n",
+ "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (0.7.2)\n",
+ "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.1.3)\n",
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (4.0.0)\n",
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.12/dist-packages (from rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (2.19.2)\n",
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.12/dist-packages (from markdown-it-py>=2.2.0->rich->keras-hub==0.19.0->keras-nlp==0.19.0->-r cicd-requirements.txt (line 2)) (0.1.2)\n",
+ "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.12/dist-packages (from werkzeug>=1.0.1->tensorboard~=2.19.0->tensorflow<2.20,>=2.19.0->tensorflow-text==2.19.0->-r cicd-requirements.txt (line 1)) (3.0.3)\n",
+ "Using cached numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)\n",
+ "Using cached tensorflow-2.19.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (645.0 MB)\n",
+ "Using cached tensorboard-2.19.0-py3-none-any.whl (5.5 MB)\n",
+ "Installing collected packages: numpy, tensorboard, tensorflow\n",
+ " Attempting uninstall: numpy\n",
+ " Found existing installation: numpy 2.3.5\n",
+ " Uninstalling numpy-2.3.5:\n",
+ " Successfully uninstalled numpy-2.3.5\n",
+ " Attempting uninstall: tensorboard\n",
+ " Found existing installation: tensorboard 2.20.0\n",
+ " Uninstalling tensorboard-2.20.0:\n",
+ " Successfully uninstalled tensorboard-2.20.0\n",
+ " Attempting uninstall: tensorflow\n",
+ " Found existing installation: tensorflow 2.20.0\n",
+ " Uninstalling tensorflow-2.20.0:\n",
+ " Successfully uninstalled tensorflow-2.20.0\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.3.3 which is incompatible.\n",
+ "opencv-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n",
+ "opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n",
+ "pytensor 2.35.1 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.\n",
+ "umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.4.1.post1 which is incompatible.\n",
+ "opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= \"3.9\", but you have numpy 1.26.4 which is incompatible.\n",
+ "orbax-checkpoint 0.11.28 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n",
+ "tensorflow-decision-forests 1.12.0 requires tensorflow==2.19.0, but you have tensorflow 2.19.1 which is incompatible.\n",
+ "flax 0.10.7 requires jax>=0.6.0, but you have jax 0.5.3 which is incompatible.\n",
+ "shap 0.50.0 requires numpy>=2, but you have numpy 1.26.4 which is incompatible.\n",
+ "imbalanced-learn 0.14.0 requires scikit-learn<2,>=1.4.2, but you have scikit-learn 1.4.1.post1 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed numpy-1.26.4 tensorboard-2.19.0 tensorflow-2.19.1\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.colab-display-data+json": {
+ "pip_warning": {
+ "packages": [
+ "numpy",
+ "tensorflow"
+ ]
+ },
+ "id": "d3a167bbbde043ef9a994c35060fda79"
+ }
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **RESTART THE SESSION**\n",
+ "\n",
+ "Then proceed to the next cell which imports all necessary libraries and defines global constants and hyperparameters for the entire pipeline.\n"
+ ],
+ "metadata": {
+ "id": "v69rLBcmXyGD"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "! ls"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ubtKyfBQzFEW",
+ "outputId": "6cbe44e6-3ce7-4227-982a-88d0d36d2205"
+ },
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "cerebros-core-algorithm-alpha sample_data\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# 1. # **ONLY IF** the directory cerebros-core-algorithm-alpha is not still\n",
+ "# there, clone the directory again.\n",
+ "# ! git clone https://github.com/david-thrower/cerebros-core-algorithm-alpha.git\n",
+ "\n",
+ "# 2. Set the working directory (in the new session) - DO run this.\n",
+ "%cd cerebros-core-algorithm-alpha"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NemXTsYgfE0s",
+ "outputId": "ca92342f-1f82-42ee-8562-980b1c8dd849"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/cerebros-core-algorithm-alpha\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Verify we are in the right place:\n",
+ "! pwd"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "D3K4dSVQhrIc",
+ "outputId": "5a45fa94-1bb3-46ce-c362-27f456221fd6"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/content/cerebros-core-algorithm-alpha\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Standard library imports\n",
+ "import subprocess\n",
+ "import time\n",
+ "from gc import collect\n",
+ "\n",
+ "# Third-party library imports\n",
+ "import tensorflow as tf\n",
+ "import pandas as pd\n",
+ "import pendulum\n",
+ "from transformers import AutoTokenizer\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "# Cerebros specific imports\n",
+ "from cerebros.units.units import DenseUnit\n",
+ "from cerebros.simplecerebrosrandomsearch.simple_cerebros_random_search import SimpleCerebrosRandomSearch\n",
+ "from cerebros.denseautomlstructuralcomponent.dense_automl_structural_component import (\n",
+ " zero_7_exp_decay,\n",
+ " zero_95_exp_decay,\n",
+ " simple_sigmoid\n",
+ ")\n",
+ "from cerebrosllmutils.llm_utils import (\n",
+ " prepare_data,\n",
+ " InterleavedRoPE,\n",
+ " Perplexity,\n",
+ " CerebrosNotGPTConfig,\n",
+ " CerebrosNotGPT,\n",
+ " WarmupCosineDecayRestarts\n",
+ ")\n",
+ "\n",
+ "# Import the data source: Format List[str]\n",
+ "from vanilladatasets.web_english_bible import samples as bible\n",
+ "\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "WKCdCv96X4YX",
+ "outputId": "875f6626-4f4b-426c-c697-da9f186e440a"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.12/dist-packages/jaxlib/plugin_support.py:71: RuntimeWarning: JAX plugin jax_cuda12_plugin version 0.7.2 is installed, but it is not compatible with the installed jaxlib version 0.5.3, so it will not be used.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Data and Training Constants\n",
+ "\n",
+ "These parameters control the amount of data used and the behavior of the training stages.\n",
+ "\n",
+ "- **PHASE_I_A_SAMPLES_TO_CREATE**: Size of the subset of the dataset used for the NAS (Neural Architecture Search) stage (number of text samples).\n",
+ "- **PHASE_I_B_SAMPLES_TO_CREATE**: Number of samples to use for the main training task stage after Neural Architecture Search is completed.\n",
+ "- **PHASE_I_B_VAL_SPLIT**: Fraction of data for validation in Phase I-b.\n",
+ "- **PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE**: Batch size for preprocessing in Phase I-b to manage RAM.\n",
+ "- **PROMPT_LENGTH**: Number of tokens provided to the model to predict the next token. It is recommended to keep this as 1.\n"
+ ],
+ "metadata": {
+ "id": "rK0LZP7KbQqm"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Samples to use for the neural architecture search stage\n",
+ "PHASE_I_A_SAMPLES_TO_CREATE = 10\n",
+ "\n",
+ "# Samples to use for the main training stage\n",
+ "PHASE_I_B_SAMPLES_TO_CREATE = 20\n",
+ "PHASE_I_B_VAL_SPLIT = 0.15\n",
+ "\n",
+ "# For Stage I-b, we preprocess in batches to avoid high RAM usage.\n",
+ "PHASE_I_B_SAMPLE_EXPANSION_BATCH_SIZE = 10\n",
+ "\n",
+ "# How many tokens to provide before expecting the next token to be predicted.\n",
+ "PROMPT_LENGTH = 1\n"
+ ],
+ "metadata": {
+ "id": "vywbZQxAZC9R"
+ },
+ "execution_count": 5,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Model and Embedding Constants\n",
+ "\n",
+ "These constants define the size and shape of the model's text processing components.\n",
+ "\n",
+ "- **MAX_SEQ_LENGTH**: The maximum sequence length the model will handle. This has a linear relationship with RAM/CPU usage.\n",
+ "- **tokenizer_checkpoint**: The Hugging Face model to use for tokenization.\n",
+ "- **EMBEDDING_N**: A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). A factor to determine the embedding dimensionality (EMBEDDING_DIM = EMBEDDING_N * 2). The resulting embedding dimensionality (EMBEDDING_DIM) for InterleavedRoPE must be an even number. Using this parameter as a proxy, rather than setting EMBEDDING_DIM directly, acts as a guard rail to ensure this constraint is met.\n",
+ "- **PROJECTION_N**: Controls the size of a projection layer after embedding. Increasing this value can significantly increase RAM usage.\n"
+ ],
+ "metadata": {
+ "id": "5jK5wbA5b8se"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Text encoding / embedding related constants\n",
+ "MAX_SEQ_LENGTH = 40\n",
+ "\n",
+ "# Tokenization\n",
+ "tokenizer_checkpoint = \"HuggingFaceTB/SmolLM3-3B\"\n",
+ "tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint)\n",
+ "\n",
+ "# Add special tokens for potential instruction-following formats\n",
+ "special_tokens = {\n",
+ " \"additional_special_tokens\": [\"\", \"\", \"\", \"\"]\n",
+ "}\n",
+ "tokenizer.add_special_tokens(special_tokens)\n",
+ "\n",
+ "VOCABULARY_SIZE = len(tokenizer)\n",
+ "\n",
+ "# For InterleavedRoPE, the embedding output dim must be an even number.\n",
+ "EMBEDDING_N = 6\n",
+ "EMBEDDING_DIM = int(EMBEDDING_N * 2)\n",
+ "\n",
+ "# Size of the projection layer. Keep low to manage RAM.\n",
+ "PROJECTION_N = 1\n"
+ ],
+ "metadata": {
+ "id": "4Kka_A4tb3aJ",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "6c85d1ae-52f4-4ddf-d768-ea5781b1b7da"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n",
+ "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
+ "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
+ "You will be able to reuse this secret in all of your notebooks.\n",
+ "Please note that authentication is recommended but still optional to access public models or datasets.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Stage I-a (NAS) Hyperparameters\n",
+ "\n",
+ "These parameters control the Neural Architecture Search process.\n",
+ "\n",
+ "- **moities_to_try**: Number of different layer permutations to try.\n",
+ "- **tries_per_moity**: Number of topologies to try for each permutation.\n",
+ "- **epochs, batch_size, learning_rate**: Standard training parameters for the NAS stage.\n",
+ "- **predecessor_level_connection_affinity_factor_first**: Controls connectivity density between the Input layer and the first level of Dense layers.\n",
+ "- **predecessor_level_connection_affinity_factor_main**: Controls connectivity density between the Input layer and the first level of Dense layers and the subsequent level of Dense layers, as well as all subsequent vertical connectivity.\n",
+ "- **p_lateral_connection, num_lateral_connection_tries_per_unit**: Control the density of lateral connectivity between Dense layers on the same row.\n",
+ "- **minimum_levels, maximum_levels**: Number of **rows of** Dense layers in the architecture grid.\n",
+ "- **minimum_units_per_level, maximum_units_per_level**: Number of Dense layers per row.\n",
+ "- **minimum_neurons_per_unit, maximum_neurons_per_unit**: The number of neurons for each Dense layer unit.\n"
+ ],
+ "metadata": {
+ "id": "MeoWtePacWz_"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Cerebros [non-HP-tunable] configurables for NAS\n",
+ "moities_to_try = 3\n",
+ "tries_per_moity = 1\n",
+ "\n",
+ "### Main tunable hyperparameters for NAS ##\n",
+ "\n",
+ "POSITIONAL_EMBEDDING_DROPOUT = 0.7651951380000674\n",
+ "activation = 'softplus'\n",
+ "\n",
+ "# Vertical connectivity hyperparameters\n",
+ "predecessor_level_connection_affinity_factor_first = 17.851026458010523\n",
+ "predecessor_level_connection_affinity_factor_main = 21.487301631581428\n",
+ "\n",
+ "# Lateral connectivity hyperparameters\n",
+ "max_consecutive_lateral_connections = 7\n",
+ "p_lateral_connection = 0.24927354102044022\n",
+ "num_lateral_connection_tries_per_unit = 32\n",
+ "learning_rate = 0.003025583248301791\n",
+ "epochs = 41\n",
+ "batch_size = 5\n",
+ "gradient_accumulation_steps = 4\n",
+ "\n",
+ "# Architecture grid constraints\n",
+ "minimum_levels = 2\n",
+ "maximum_levels = 2\n",
+ "minimum_units_per_level = 2\n",
+ "maximum_units_per_level = 2\n",
+ "minimum_neurons_per_unit = 2\n",
+ "maximum_neurons_per_unit = 2\n"
+ ],
+ "metadata": {
+ "id": "Wbowkxnbc4Zd"
+ },
+ "execution_count": 7,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Phase I-b (Extended Training) Hyperparameters\n",
+ "\n",
+ "These parameters are for fine-tuning the best model from Stage I-a.\n",
+ "\n",
+ "- INITIAL_LR_STAGE_I_B: Initial learning rate for this phase.\n",
+ "- WARMUP_EPOCHS_STAGE_I_B, WARMUP_STEPS: Parameters for the learning rate scheduler.\n",
+ "- phase_i_b_epochs: Number of epochs for extended training.\n",
+ "- phase_i_b_weight_decay: Weight decay for the optimizer.\n"
+ ],
+ "metadata": {
+ "id": "fcGTs9ASdXps"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "## Training Stage I-b parameters:\n",
+ "INITIAL_LR_STAGE_I_B = 0.0039295722955565125\n",
+ "WARMUP_EPOCHS_STAGE_I_B = 7\n",
+ "WARMUP_STEPS = 1140\n",
+ "FIRST_DECAY_STEPS_STAGE_I_B = 1900\n",
+ "phase_i_b_epochs = 53\n",
+ "phase_i_b_gradient_accumulation_steps = 7\n",
+ "phase_i_b_weight_decay = 0.01647018768215773 # For AdamW\n"
+ ],
+ "metadata": {
+ "id": "-znwaddIdiKU"
+ },
+ "execution_count": 8,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "# Generation Constants\n",
+ "\n",
+ "Parameters used during the text generation evaluation phase."
+ ],
+ "metadata": {
+ "id": "vy5y6OXhdvzV"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "## Generation time configurables:\n",
+ "GENERATION_PROMPT_LEN = 25\n",
+ "MAX_NEW_TOKENS = MAX_SEQ_LENGTH - GENERATION_PROMPT_LEN"
+ ],
+ "metadata": {
+ "id": "JHjCz9qXd5Gq"
+ },
+ "execution_count": 9,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# **Data Preparation**\n",
+ "\n",
+ "Here, we load and subset the dataset for both training Stages.\n",
+ "\n",
+ "\n",
+ "We first split the Bible text samples into two sets: one for Phase I-a (NAS) and a larger one for Phase I-b (extended training).\n"
+ ],
+ "metadata": {
+ "id": "N7fJIZ1md-0Y"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Get training data from the bible text samples\n",
+ "non_instruct_samples = bible[:PHASE_I_A_SAMPLES_TO_CREATE]\n",
+ "phase_i_b_samples = bible[PHASE_I_A_SAMPLES_TO_CREATE:PHASE_I_B_SAMPLES_TO_CREATE + PHASE_I_A_SAMPLES_TO_CREATE]\n",
+ "\n",
+ "print(f\"Samples from KJV bible consisting of {len(non_instruct_samples)} look like this (sub-sample of 3): {non_instruct_samples[:3]}\")\n"
+ ],
+ "metadata": {
+ "id": "jIFxWcBzeLjN",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "d46f8e34-3d7d-4fb4-dddc-bf1c45bae7ee"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Samples from KJV bible consisting of 10 look like this (sub-sample of 3): ['In the beginning God created the heavens and the earth.', \"The earth was formless and empty, with darkness over the deep and God's Spirit hovering over the waters.\", \"God said, 'Let there be light,' and there was light.\"]\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Preprocess Data for Phase I-a (NAS)\n",
+ "\n",
+ "The Cerebros LLM is a single head model. This means that each time the model is called, it returns only the next token. It does not regurgitate the cumulative sequence, nor does it have a separate head for each position in the sequence.\n",
+ "\n",
+ "For both training stages, each text sample is expanded into multiple input/label pairs, which we call \"sub-samples.\" There is one \"sub-sample\" for each token in the range between the first token and the first occurrence of a padding token or the end of the sequence, whichever comes first.\n",
+ "\n",
+ "For example, the sequence [t1, t2, t3] becomes:\n",
+ "\n",
+ " Input: [t1, 2, 2, 2] Label: [t2] # One hot encoded to VOCABULARY_SIZE\n",
+ " Input: [t1, t2, 2, 2], Label: [t3]\n",
+ " Input: [t1, t2, t3, 2], Label: [2]\n",
+ "\n",
+ "For training Stage 1-a, we perform the entire expansion for its training data in memory. This is because the NAS does not yet support a tf.data.Dataset object. In the future, we may retrofit the NAS algorithm to support streaming preprocessing as well, allowing us to use a larger dataset for the NAS.\n",
+ "\n",
+ "For stage I-b, the extended training stage, the same operation is done in batches. This is because this operation significantly increases the amount of memory required. The main reason for this is the one-hot encoded label, where the vocabulary size is 128,260. Since we do this in batches, this allows for a virtually unlimited number of samples to be processed.\n",
+ "\n",
+ "For reference, this is the preprocessing being applied:\n",
+ "\n",
+ "```python\n",
+ "def prepare_data(\n",
+ " data_0: List[str],\n",
+ " tokenizer_0: Any,\n",
+ " max_seq_length: int = 1024,\n",
+ " prompt_length: int = 1) -> Tuple[List[List[int]], List[List[int]], int]:\n",
+ "\n",
+ "\n",
+ " all_input_ids = []\n",
+ " all_labels = []\n",
+ "\n",
+ " pad_token_id = tokenizer_0.pad_token_id\n",
+ "\n",
+ " # Tokenize all data at once for efficiency\n",
+ " tokenized_data = tokenizer_0(\n",
+ " data_0,\n",
+ " max_length=max_seq_length,\n",
+ " padding='max_length',\n",
+ " truncation=True,\n",
+ " add_special_tokens=False # We'll handle special tokens manually\n",
+ " )\n",
+ " vocab_size = len(tokenizer_0)\n",
+ "\n",
+ " # Get the token ID for \n",
+ " end_prompt_token_id = tokenizer_0.encode(\"\", add_special_tokens=False)[0]\n",
+ "\n",
+ " # Process each sample\n",
+ " for sample_tokens in tokenized_data['input_ids']:\n",
+ " # Find the index of token\n",
+ " try:\n",
+ " end_prompt_index = sample_tokens.index(end_prompt_token_id)\n",
+ " except ValueError:\n",
+ " # If not found, treat sample as a non-instruct sample\n",
+ " end_prompt_index = (\n",
+ " prompt_length - 1) # int(np.ceil(len(sample_tokens) * (1/3))) # 0 ## 1. Give it a fair starting place to predict the next word 2. reduce the number of expanded samples\n",
+ "\n",
+ " # Find first pad token after \n",
+ " first_pad_index = None\n",
+ " for i in range(end_prompt_index + 1, len(sample_tokens)):\n",
+ " if sample_tokens[i] == pad_token_id:\n",
+ " first_pad_index = i\n",
+ " break\n",
+ "\n",
+ " # If no pad token found, use the end of sequence\n",
+ " if first_pad_index is None:\n",
+ " first_pad_index = len(sample_tokens)\n",
+ "\n",
+ " # Apply sliding window from after to first pad token\n",
+ " # Start from end_prompt_index + 1 (first token to predict)\n",
+ " # End at first_pad_index - 1 (last token to predict)\n",
+ " for i in range(end_prompt_index + 1, first_pad_index):\n",
+ " # Input: from start up to (but not including) token i\n",
+ " input_ids = sample_tokens[:i]\n",
+ "\n",
+ " # Pad or truncate to max_seq_length\n",
+ " if len(input_ids) > max_seq_length:\n",
+ " input_ids = input_ids[:max_seq_length]\n",
+ " else:\n",
+ " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n",
+ "\n",
+ " # Label: one-hot encoding of token at position i\n",
+ " next_token = sample_tokens[i]\n",
+ " label = [0] * vocab_size\n",
+ " label[next_token] = 1\n",
+ "\n",
+ " all_input_ids.append(input_ids)\n",
+ " all_labels.append(label)\n",
+ "\n",
+ " # Add final sample with pad token as label to indicate termination\n",
+ " if first_pad_index < len(sample_tokens): # Only if there's actually a pad token\n",
+ " input_ids = sample_tokens[:first_pad_index]\n",
+ "\n",
+ " # Pad or truncate to max_seq_length\n",
+ " if len(input_ids) > max_seq_length:\n",
+ " input_ids = input_ids[:max_seq_length]\n",
+ " else:\n",
+ " input_ids = input_ids + [pad_token_id] * (max_seq_length - len(input_ids))\n",
+ "\n",
+ " # Label: one-hot encoding of pad token\n",
+ " label = [0] * vocab_size\n",
+ " label[pad_token_id] = 1\n",
+ "\n",
+ " all_input_ids.append(input_ids)\n",
+ " all_labels.append(label)\n",
+ "\n",
+ " return all_input_ids, all_labels, vocab_size\n",
+ "```\n"
+ ],
+ "metadata": {
+ "id": "8Tu8X9cVeQVD"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "# Preprocess data for Stage I-a training\n",
+ "x, y, vocab_size = prepare_data(data_0=non_instruct_samples,\n",
+ " tokenizer_0=tokenizer,\n",
+ " max_seq_length=MAX_SEQ_LENGTH,\n",
+ " prompt_length=PROMPT_LENGTH)\n",
+ "\n",
+ "# Split the preprocessed data for NAS training and validation\n",
+ "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.85, shuffle=False)\n",
+ "\n",
+ "# Package data into lists for the Cerebros AutoML component\n",
+ "x_train_tf = tf.constant(X_train, tf.int32)\n",
+ "y_train_tf = tf.constant(y_train, tf.float32)\n",
+ "x_train_packaged = [x_train_tf]\n",
+ "y_train_packaged = [y_train_tf]\n",
+ "\n",
+ "# Do the same for the validation data\n",
+ "x_test_tf = tf.constant(X_test, tf.int32)\n",
+ "y_test_tf = tf.constant(y_test, tf.float32)\n",
+ "x_test_packaged = [x_test_tf]\n",
+ "y_test_packaged = [y_test_tf]\n",
+ "\n",
+ "# Define input and output shapes for the AutoML model\n",
+ "INPUT_SHAPES = [(MAX_SEQ_LENGTH,)]\n",
+ "OUTPUT_SHAPES = [(VOCABULARY_SIZE)]\n"
+ ],
+ "metadata": {
+ "id": "EDyuTMLufYvs"
+ },
+ "execution_count": 11,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Train, Test Split of the Data for Stage I-b training\n",
+ "\n",
+ "We split the larger Phase I-b dataset into training and validation sets. Again, this dataset will be processed by a streaming generator in batches to avoid memory saturation and make the training more scalable. We will revisit that later."
+ ],
+ "metadata": {
+ "id": "zX60zcpykasl"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "# Split the phase I-b data set for training and validation\n",
+ "phase_i_b_train_samples, phase_i_b_val_samples = train_test_split(\n",
+ " phase_i_b_samples,\n",
+ " test_size=PHASE_I_B_VAL_SPLIT,\n",
+ " shuffle=False\n",
+ ")\n"
+ ],
+ "metadata": {
+ "id": "SMSdkFRPkg7D"
+ },
+ "execution_count": 12,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "phase_i_b_train_samples[:3]"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Oqw-T7bOo1GD",
+ "outputId": "2e8f24fc-24c2-4a06-babb-550b676b7751"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[\"God said, 'Let the earth produce vegetation, seed-bearing plants, and fruit trees, each according to its kind,' and it was so.\",\n",
+ " 'The earth brought forth grass, seed-bearing herbs, and fruit trees, each with its seed, and God saw that it was good.',\n",
+ " 'There was evening and morning, the third day.']"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X_train[:2]"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Hv_52izIjOQ7",
+ "outputId": "e2972924-0190-4f16-9317-c00100486203"
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "[[644,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012],\n",
+ " [644,\n",
+ " 279,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012,\n",
+ " 128012]]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Base Text Embedding Model Definition\n",
+ "\n",
+ "- Before we run the NAS, we define a base model that handles token embeddings and positional embeddings.\n",
+ "- The SimpleCerebrosRandomSearch will then attach its auto-generated lattice of dense layers on top of this base model.\n",
+ "- The Cerebros NAS takes an init parameter base_models: List[tf.keras.Model]\n"
+ ],
+ "metadata": {
+ "id": "11Ri4PtKktih"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "####### Text embedding base model #####################\n",
+ "\n",
+ "inp = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype=tf.int32)\n",
+ "\n",
+ "# Token embedding layer\n",
+ "embedded = tf.keras.layers.Embedding(\n",
+ " input_dim=VOCABULARY_SIZE,\n",
+ " output_dim=EMBEDDING_DIM,\n",
+ " input_length=MAX_SEQ_LENGTH,\n",
+ " mask_zero=False\n",
+ ")(inp)\n",
+ "\n",
+ "# Interleaved Rotary Positional Embedding (iRoPE)\n",
+ "position_embedding = InterleavedRoPE(\n",
+ " dim=EMBEDDING_DIM,\n",
+ " max_seq_len=MAX_SEQ_LENGTH,\n",
+ ")(embedded)\n",
+ "\n",
+ "# Concatenate token and positional embeddings\n",
+ "x = tf.keras.layers.Concatenate()([embedded, position_embedding])\n",
+ "x = tf.keras.layers.Dropout(POSITIONAL_EMBEDDING_DROPOUT)(x)\n",
+ "\n",
+ "# Flatten and project to the desired dimension\n",
+ "flattened = tf.keras.layers.Flatten()(x)\n",
+ "projected = tf.keras.layers.Dense(EMBEDDING_DIM * PROJECTION_N)(flattened)\n",
+ "\n",
+ "# Create the base Keras model\n",
+ "cerebros_base_model = tf.keras.Model(\n",
+ " inputs=inp,\n",
+ " outputs=projected\n",
+ ")\n"
+ ],
+ "metadata": {
+ "id": "tn1qrGISn_Pe",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "e76e091c-6e7f-4820-ef79-15143f1e6b64"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.12/dist-packages/keras/src/layers/core/embedding.py:97: UserWarning: Argument `input_length` is deprecated. Just remove it.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## FYI: The iRoPE Embedding:\n",
+ "\n",
+ "The RoPE embedding, and helper functions it depends on (previously imported from the local package cerebrosllmutils):\n",
+ "\n",
+ "- iRoPE: Interleaved Rotary Positional Embedding\n",
+ "- RoPE: Rotary Positional Embedding\n",
+ "- The Rotary Positional Embedding expresses positional relationships as angles, extends feasible context window.\n",
+ "- iRoPE: iRoPE applies the rotation in an interleaved manner and enables capturing more nuance and extending context windows feasible to around 2 million tokens.\n",
+ "\n",
+ "```python\n",
+ "# --- Base Rotary Positional Embedding\n",
+ "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='RotaryEmbedding')\n",
+ "class RotaryEmbedding(tf.keras.layers.Layer):\n",
+ " def __init__(self, dim, max_seq_len=1024, temperature=10000.0, **kwargs):\n",
+ " super().__init__(**kwargs)\n",
+ " self.dim = dim\n",
+ " # Ensure dim is even right at initialization\n",
+ " if self.dim % 2 != 0:\n",
+ " raise ValueError(f\"Embedding dimension `dim` ({self.dim}) must be even for RotaryEmbedding.\")\n",
+ " self.max_seq_len = max_seq_len\n",
+ " self.temperature = temperature\n",
+ " # *** No calculation or storage of inv_freq here or in build ***\n",
+ "\n",
+ " def build(self, input_shape):\n",
+ " # Build should primarily be for creating trainable weights, which we don't have.\n",
+ " # Call super().build() for Keras compatibility.\n",
+ " super().build(input_shape)\n",
+ "\n",
+ " def call(self, x): # Removed seq_len argument, calculate from x\n",
+ " shape = tf.shape(x)\n",
+ " batch_size = shape[0]\n",
+ " actual_seq_len = shape[1]\n",
+ "\n",
+ " # *** Calculate inv_freq inside call ***\n",
+ " inv_freq_base = tf.range(0, self.dim, 2, dtype=tf.float32)\n",
+ " inv_freq = 1.0 / (self.temperature ** (inv_freq_base / self.dim))\n",
+ " # Ensure inv_freq has the correct shape [dim/2]\n",
+ " inv_freq = tf.cast(inv_freq, dtype=x.dtype) # Match dtype early\n",
+ "\n",
+ " # Use actual_seq_len for calculations\n",
+ " position = tf.range(actual_seq_len, dtype=x.dtype) # Match dtype\n",
+ "\n",
+ " # Calculate sinusoid input using einsum or broadcasting\n",
+ " # Einsum approach: Ensure correct dimensions [seq_len, dim/2]\n",
+ " sinusoid_inp = tf.einsum(\"i,j->ij\", position, inv_freq)\n",
+ "\n",
+ " # Calculate sin and cos based on the actual sequence length\n",
+ " sin = tf.sin(sinusoid_inp)\n",
+ " cos = tf.cos(sinusoid_inp)\n",
+ "\n",
+ " # Repeat sin/cos for interleaving: [a, b] -> [a, a, b, b]\n",
+ " # Result needs shape [actual_seq_len, dim]\n",
+ " sin = tf.repeat(sin, 2, axis=-1)\n",
+ " cos = tf.repeat(cos, 2, axis=-1)\n",
+ "\n",
+ " # Expand dims for batch and tile\n",
+ " # Output shape needs to be [batch_size, actual_seq_len, dim]\n",
+ " # Add batch dimension: [1, actual_seq_len, dim]\n",
+ " sin = tf.expand_dims(sin, axis=0)\n",
+ " cos = tf.expand_dims(cos, axis=0)\n",
+ "\n",
+ " # Tile to match the batch size: [batch_size, actual_seq_len, dim]\n",
+ " sin = tf.tile(sin, [batch_size, 1, 1])\n",
+ " cos = tf.tile(cos, [batch_size, 1, 1])\n",
+ "\n",
+ " # Casting to x.dtype was already done for inv_freq, sin/cos will inherit\n",
+ " # sin = tf.cast(sin, x.dtype) # Already done via calculation chain\n",
+ " # cos = tf.cast(cos, x.dtype) # Already done via calculation chain\n",
+ "\n",
+ " # Return sin and cos needed by InterleavedRoPE\n",
+ " return sin, cos\n",
+ "\n",
+ " def get_config(self):\n",
+ " config = super().get_config()\n",
+ " config.update({\n",
+ " \"dim\": self.dim,\n",
+ " \"max_seq_len\": self.max_seq_len,\n",
+ " \"temperature\": self.temperature,\n",
+ " })\n",
+ " return config\n",
+ "\n",
+ " @classmethod\n",
+ " def from_config(cls, config):\n",
+ " return cls(**config)\n",
+ "\n",
+ "\n",
+ "# iRoPE helper functions\n",
+ "\n",
+ "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='split_alternate')\n",
+ "def split_alternate(x):\n",
+ " shape = tf.shape(x)\n",
+ " x = tf.reshape(x, [shape[0], shape[1], shape[2] // 2, 2])\n",
+ " x = tf.transpose(x, [0, 1, 3, 2])\n",
+ " x = tf.reshape(x, [shape[0], shape[1], -1])\n",
+ " return x\n",
+ "\n",
+ "\n",
+ "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='rotate_half')\n",
+ "def rotate_half(x):\n",
+ " x = split_alternate(x)\n",
+ " d = tf.shape(x)[-1]\n",
+ " rotated_x = tf.concat([-x[..., d // 2:], x[..., :d // 2]], axis=-1)\n",
+ " return tf.reshape(rotated_x, tf.shape(x))\n",
+ "\n",
+ "\n",
+ "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='apply_rotary_pos_emb')\n",
+ "def apply_rotary_pos_emb(x, sin, cos):\n",
+ " cos = tf.reshape(cos, [tf.shape(cos)[0], tf.shape(cos)[1], -1])\n",
+ " sin = tf.reshape(sin, [tf.shape(sin)[0], tf.shape(sin)[1], -1])\n",
+ " x_rotated = x * cos + rotate_half(x) * sin\n",
+ " return x_rotated\n",
+ "\n",
+ "\n",
+ "# interleaved Rotary Postional Embedding (iRoPE)\n",
+ "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='InterleavedRoPE')\n",
+ "class InterleavedRoPE(tf.keras.layers.Layer):\n",
+ " def __init__(self, dim, max_seq_len=1024, **kwargs):\n",
+ " super().__init__(**kwargs)\n",
+ " if dim % 2 != 0:\n",
+ " raise ValueError(f\"Embedding dimension `dim` ({dim}) must be even for InterleavedRoPE.\")\n",
+ " self.dim = dim\n",
+ " self.max_seq_len = max_seq_len\n",
+ " # Instantiate the RotaryEmbedding layer\n",
+ " # Ensure the name is consistent if needed for saving/loading\n",
+ " self.rotary_emb = RotaryEmbedding(dim, max_seq_len, name=\"rotary_embedding\")\n",
+ "\n",
+ " def call(self, x):\n",
+ " # Get sin and cos from the RotaryEmbedding layer's call method\n",
+ " # *** Pass only 'x'. RotaryEmbedding calculates seq_len internally. ***\n",
+ " sin, cos = self.rotary_emb(x)\n",
+ "\n",
+ " # Apply the positional embeddings\n",
+ " x_embedded = apply_rotary_pos_emb(x, sin, cos)\n",
+ " return x_embedded\n",
+ "\n",
+ " def get_config(self):\n",
+ " config = super().get_config()\n",
+ " config.update({\n",
+ " \"dim\": self.dim,\n",
+ " \"max_seq_len\": self.max_seq_len,\n",
+ " })\n",
+ " # Keras handles nested layer serialization automatically\n",
+ " return config\n",
+ "\n",
+ " @classmethod\n",
+ " def from_config(cls, config):\n",
+ " # Keras handles nested layer restoration automatically\n",
+ " return cls(**config)\n",
+ "```"
+ ],
+ "metadata": {
+ "id": "CXtYv20vpkMY"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Custom metric Perplexity (previously imported from the local package cerebrosllmutils):\n",
+ "\n",
+ "Since there is not a Perplexity metric in tensorflow.keras.metrics, we created our own, and one designed for this single - head model.\n",
+ "\n",
+ "## This is what it looks like:\n",
+ "\n",
+ "```python\n",
+ "@tf.keras.utils.register_keras_serializable(package='cerebrosllmutils', name='Perplexity')\n",
+ "class Perplexity(tf.keras.metrics.Metric):\n",
+ " \"\"\"\n",
+ " Computes perplexity, defined as e^(categorical crossentropy).\n",
+ " \"\"\"\n",
+ "\n",
+ " def __init__(self, name='perplexity', **kwargs):\n",
+ " super().__init__(name=name, **kwargs)\n",
+ " self.total_crossentropy = self.add_weight(name='total_crossentropy', initializer='zeros')\n",
+ " self.count = self.add_weight(name='count', initializer='zeros')\n",
+ "\n",
+ " def update_state(self, y_true, y_pred, sample_weight=None):\n",
+ " # Calculate categorical crossentropy\n",
+ " crossentropy = tf.keras.losses.categorical_crossentropy(y_true, y_pred)\n",
+ "\n",
+ " # Update the running sum of crossentropy and the count of samples\n",
+ " self.total_crossentropy.assign_add(tf.reduce_sum(crossentropy))\n",
+ " self.count.assign_add(tf.cast(tf.shape(y_true)[0], dtype=tf.float32))\n",
+ "\n",
+ " def result(self):\n",
+ " # Compute the average crossentropy\n",
+ " average_crossentropy = self.total_crossentropy / self.count\n",
+ " # Compute perplexity as e^(average crossentropy)\n",
+ " return tf.exp(average_crossentropy)\n",
+ "\n",
+ " def reset_state(self):\n",
+ " # Reset the state variables\n",
+ " self.total_crossentropy.assign(0.0)\n",
+ " self.count.assign(0.0)\n",
+ "```\n"
+ ],
+ "metadata": {
+ "id": "uN3adqRLo61X"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# Custom metric: Perplexity\n",
+ "perplexity_metric = Perplexity()"
+ ],
+ "metadata": {
+ "id": "_8uTBW_to7iQ"
+ },
+ "execution_count": 16,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "# Stage I-a training: Neural Architecture Search (NAS)\n",
+ "\n",
+ "We now run the SimpleCerebrosRandomSearch to find the best performing architecture based on the training data and the base model. The search aims to minimize the perplexity in the train set. The search aims to minimize the perplexity in the training set. Obviously, in a full - scale run, we would use the validation set's value.\n",
+ "\n",
+ "- The Cerebros NAS will parse a block composed of rows (Levels) of multiple Dense layers (Units) with an overlapping, interleaved, interwoven topology both laterally between Dense layers on the same row and vertically between layers on different levels.\n",
+ "- This topology emulates the neuroscience principle of modularity.\n",
+ "- This topology allows local clusters of densely connected neurons to learn specialized fragments of a problem, while allowing efficient communication between these clusters to coordinate among themselves to compose a solution to a complex problem.\n",
+ "\n",
+ "For the deep technical details of how Cerebros NAS works: [How Cerebros NAS Works](https://github.com/david-thrower/cerebros-core-algorithm-alpha/blob/277-attempt-to-imporve-parameters-on--dev-branch-275/documentation/cerebros-technical-details.md)\n",
+ "\n",
+ "## This is what a neural network parsed by Cerebros looks like:\n",
+ "\n",
+ "- Green triangles: Input layers\n",
+ "- Blue squares: Concatenate layer -> [BatchNormalization | Dropout]\n",
+ "- Pink ovals: Hidden Dense layers\n",
+ "- Red oval: Output Dense layer\n"
+ ],
+ "metadata": {
+ "id": "tWjbHiHRMhR4"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "1wR8EVItNNh_"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "\n",
+ "## For a more readable view of that this looks like\n",
+ "\n",
+ "\n"
+ ],
+ "metadata": {
+ "id": "_bXR1QxaLPiq"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "######## Instantiate Cerebros Neural Architecture Search #######\n",
+ "\n",
+ "# Project metadata\n",
+ "TIME = pendulum.now(tz='America/New_York').__str__()[:16].replace('T', '_').replace(':', '_').replace('-', '_')\n",
+ "PROJECT_NAME = f'{TIME}_cerebros_not-gpt'\n",
+ "meta_trial_number = 42\n",
+ "\n",
+ "# Initialize the AutoML search\n",
+ "cerebros_automl = SimpleCerebrosRandomSearch(\n",
+ " unit_type=DenseUnit,\n",
+ " input_shapes=INPUT_SHAPES,\n",
+ " output_shapes=OUTPUT_SHAPES,\n",
+ " training_data=x_train_packaged,\n",
+ " labels=y_train_packaged,\n",
+ " validation_split=0.2,\n",
+ " direction='minimize',\n",
+ " metric_to_rank_by=\"perplexity\",\n",
+ " minimum_levels=minimum_levels,\n",
+ " maximum_levels=maximum_levels,\n",
+ " minimum_units_per_level=minimum_units_per_level,\n",
+ " maximum_units_per_level=maximum_units_per_level,\n",
+ " minimum_neurons_per_unit=minimum_neurons_per_unit,\n",
+ " maximum_neurons_per_unit=maximum_neurons_per_unit,\n",
+ " activation=activation,\n",
+ " final_activation='softmax',\n",
+ " number_of_architecture_moities_to_try=moities_to_try,\n",
+ " number_of_tries_per_architecture_moity=tries_per_moity,\n",
+ " predecessor_level_connection_affinity_factor_first=predecessor_level_connection_affinity_factor_first,\n",
+ " predecessor_level_connection_affinity_factor_main=predecessor_level_connection_affinity_factor_main,\n",
+ " predecessor_level_connection_affinity_factor_decay_main=zero_7_exp_decay,\n",
+ " max_consecutive_lateral_connections=max_consecutive_lateral_connections,\n",
+ " p_lateral_connection=p_lateral_connection,\n",
+ " p_lateral_connection_decay=zero_95_exp_decay,\n",
+ " num_lateral_connection_tries_per_unit=num_lateral_connection_tries_per_unit,\n",
+ " learning_rate=learning_rate,\n",
+ " loss=tf.keras.losses.CategoricalCrossentropy(),\n",
+ " metrics=[tf.keras.metrics.CategoricalAccuracy(), perplexity_metric],\n",
+ " epochs=epochs,\n",
+ " project_name=f\"{PROJECT_NAME}_meta_{meta_trial_number}\",\n",
+ " model_graphs='model_graphs',\n",
+ " batch_size=batch_size,\n",
+ " gradient_accumulation_steps=gradient_accumulation_steps,\n",
+ " meta_trial_number=meta_trial_number,\n",
+ " base_models=[cerebros_base_model],\n",
+ " train_data_dtype=tf.int32\n",
+ ")"
+ ],
+ "metadata": {
+ "id": "XV2q_5WEwBJ0"
+ },
+ "execution_count": 17,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Run the Cerebros Neural Architecture Search\n"
+ ],
+ "metadata": {
+ "id": "TJVLfmJ2virA"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cerebros_t0 = time.time()\n",
+ "phase_i_a_result_0 = cerebros_automl.run_random_search()\n",
+ "cerebros_t1 = time.time()\n",
+ "\n",
+ "# Report results\n",
+ "cerebros_time_all_models_min = (cerebros_t1 - cerebros_t0) / 60\n",
+ "models_tried = moities_to_try * tries_per_moity\n",
+ "cerebros_time_per_model = cerebros_time_all_models_min / models_tried\n",
+ "phase_i_a_result = float(phase_i_a_result_0)\n",
+ "\n",
+ "print(f\"Cerebros trained {models_tried} models in {cerebros_time_all_models_min:.2f} min. Average time per model: {cerebros_time_per_model:.2f} min.\")\n",
+ "print(f'Cerebros best perplexity achieved in Phase I-a is {phase_i_a_result}')"
+ ],
+ "metadata": {
+ "id": "ulL0EGnow5L7",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "outputId": "d56dd1ec-2f7b-4a3c-ecc6-75e595910367"
+ },
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\rGlobal task progress: 0%|\u001b[38;2;22;206;235m \u001b[0m| 0/3 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "SimpleCerebrosRandomSearch.input_shapes: [(40,)]\n",
+ "nan\n",
+ ">nnf>ceil\n",
+ "k is: 0 value is: [{'1': }]\n",
+ "0\n",
+ "k is: 1 value is: [{'2': }, {'2': }]\n",
+ "1\n",
+ "Trying to create level 1\n",
+ "We think level 1's predecessors are: [0]\n",
+ "k is: 2 value is: [{'128260': }]\n",
+ "2\n",
+ "Trying to create Final level 2\n",
+ "Trying to create level 2\n",
+ "We think level final level 2's predecessors are: [0, 1]\n",
+ "levels:\n",
+ "[0, 1, 2]\n",
+ "{'0': 'InputUnitModule'}\n",
+ "InputLevel.input_shapes [(40,)]\n",
+ "{'2': }\n",
+ "{'2': }\n",
+ "Debug: I am 2 selecting 1\n",
+ "debug: meta_level_number\n",
+ "debug: meta_level_number\n",
+ "debug: meta_level_number\n",
+ "Setting levels_unmaterialized[0] level_number 0 to have first successor: levels_unmaterialized[:1], having level_numbers of [1, 2]\n",
+ "Setting levels_unmaterialized[1] level_number 1 to have first successor: levels_unmaterialized[:2], having level_numbers of [2]\n",
+ "Debug: successor_connectivity_errors_2d []\n",
+ "$$$$$$>>>>> Base model: \n",
+ "InputUnit.input_shape: (40,)\n",
+ "{'2': }\n",
+ "{'2': }\n",
+ "debug: meta_level_number\n",
+ "debug: meta_level_number\n",
+ "Debug: successor_connectivity_errors_2d []\n",
+ "Debug: successor_connectivity_errors_2d []\n",
+ "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_0 called\n",
+ "materialized network layers\n",
+ "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n",
+ "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n",
+ "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_DenseLevel_0000000000000001_tr_0_DenseUnit_0000000000000001_tr_0_1 called\n",
+ "materialized network layers\n",
+ "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n",
+ "materialized_predecessor_units [, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n",
+ "{'128260': }\n",
+ "debug: meta_level_number\n",
+ "Debug: successor_connectivity_errors_2d []\n",
+ "materialize:_NeuralNetworkFuture_0000000000000nan_tr_0_FinalDenseLevel_0000000000000002_tr_0_FinalDenseUnit_0000000000000002_tr_0_0 called\n",
+ "materialized network layers\n",
+ "[, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , ]\n",
+ "materialized_predecessor_units [, , , , , , , , , , , ,