Skip to content

Commit 6cea4e8

Browse files
authored
Merge pull request #206 from david-thrower/205-new-branch-for-testing-searialization
205 new branch for testing searialization
2 parents caaae1d + 775470e commit 6cea4e8

File tree

2 files changed

+37
-4
lines changed

2 files changed

+37
-4
lines changed

.github/workflows/automerge.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ name: Python application
55

66
on:
77
push:
8-
branches: [ "main", "180-question-leakage-potentially-causing-the-extraordinarily-low-rmse-for-ames-dataset" ]
8+
branches: [ "main", "205-new-branch-for-testing-searialization" ]
99

1010
permissions:
1111
contents: read

phishing_email_detection_gpt2.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
import zero_7_exp_decay, zero_95_exp_decay, simple_sigmoid
3232
from ast import literal_eval
3333
import time
34-
34+
from gc import collect
3535

3636
#
3737
# Load the email data
@@ -77,6 +77,14 @@
7777
training_x = [baseline_train_x]
7878
train_labels = [baseline_train_y]
7979

80+
# Package test set:
81+
test_x_tf = tf.constant(X_test, dtype=tf.string)
82+
test_y_tf = tf.constant(y_test, dtype=tf.int8)
83+
84+
test_x_packaged = [test_x_tf]
85+
test_y_packaged = [test_y_tf]
86+
87+
8088
#
8189
# Input and output shapes
8290
#
@@ -86,6 +94,7 @@
8694
"""### A custom GPT2 encoder layer for text embedding"""
8795

8896

97+
@tf.keras.utils.register_keras_serializable()
8998
class GPT2Layer(tf.keras.layers.Layer):
9099

91100
def __init__(self, max_seq_length, **kwargs):
@@ -190,6 +199,7 @@ def from_config(cls, config):
190199
from transformers import AutoTokenizer
191200
import tensorflow as tf
192201

202+
@tf.keras.utils.register_keras_serializable()
193203
class NewTokenizerLayer(tf.keras.layers.Layer):
194204
def __init__(self, max_seq_length, tokenizer_checkpoint, **kwargs):
195205
super().__init__(**kwargs)
@@ -248,6 +258,7 @@ def from_config(cls, config):
248258

249259

250260
# --- Updated RotaryEmbedding ---
261+
@tf.keras.utils.register_keras_serializable()
251262
class RotaryEmbedding(tf.keras.layers.Layer):
252263
def __init__(self, dim, max_seq_len=1024, temperature=10000.0, **kwargs):
253264
super().__init__(**kwargs)
@@ -347,6 +358,7 @@ def apply_rotary_pos_emb(x, sin, cos):
347358
return x_rotated
348359

349360

361+
@tf.keras.utils.register_keras_serializable()
350362
class InterleavedRoPE(tf.keras.layers.Layer):
351363
def __init__(self, dim, max_seq_len=1024, **kwargs):
352364
super().__init__(**kwargs)
@@ -419,7 +431,7 @@ def from_config(cls, config):
419431
# LayerNorm ... It degraded accuracy
420432
# Just an FYI for anyone trying to apply conventional wisdom
421433
# to save you the time ...
422-
x = x = tf.keras.layers.Concatenate()([embedded, position_embedding])
434+
x = tf.keras.layers.Concatenate()([embedded, position_embedding])
423435
x = tf.keras.layers.Dropout(0.4)(x) # AI suggested 0.4
424436
flattened = tf.keras.layers.Flatten()(x)
425437

@@ -528,4 +540,25 @@ def from_config(cls, config):
528540
print(f'Cerebros best accuracy achieved is {result}')
529541
print(f'val set accuracy')
530542

531-
# """### Testing the best model found"""
543+
"""### Testing the best model found"""
544+
545+
MODEL_FILE_NAME = "cerebros-foundation-model.keras"
546+
547+
best_model_found = cerebros_automl.get_best_model()
548+
best_model_found.save(MODEL_FILE_NAME)
549+
del(best_model_found)
550+
del(cerebros_automl)
551+
collect()
552+
553+
reconstituted_model = tf.keras.models.load_model(MODEL_FILE_NAME)
554+
test_x_packaged = [test_x_tf]
555+
test_y_packaged = [test_y_tf]
556+
557+
reconstituted_model.compile(
558+
loss='binary_crossentropy',
559+
metrics=['accuracy']
560+
)
561+
562+
results = reconstituted_model.evaluate(test_x_packaged, test_y_packaged)
563+
print("Test loss:", results[0])
564+
print("Test accuracy:", results[-1])

0 commit comments

Comments
 (0)