|
45 | 45 |
|
46 | 46 |
|
47 | 47 | # Build BERT base model |
| 48 | +# text_input = tf.keras.layers.Input(shape=(), dtype=tf.string) |
| 49 | +# preprocessor = hub.KerasLayer( |
| 50 | +# "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3") |
| 51 | +# encoder_inputs = preprocessor(text_input) |
| 52 | + |
| 53 | +### |
| 54 | +# preprocessor = hub.load( |
| 55 | +# "https://www.kaggle.com/models/tensorflow/bert/TensorFlow2/en-uncased-preprocess/3") |
| 56 | +# inp = tf.keras.layers.Input(shape=(), dtype=tf.string) |
| 57 | +# text_inputs = [inp] |
| 58 | +# tokenize = hub.KerasLayer(preprocessor.tokenize) |
| 59 | +# tokenized_inputs = [tokenize(segment) for segment in text_inputs] |
| 60 | + |
| 61 | +# seq_length = 128 # Your choice here. |
| 62 | +# bert_pack_inputs = hub.KerasLayer( |
| 63 | +# preprocessor.bert_pack_inputs, |
| 64 | +# arguments=dict(seq_length=seq_length)) # Optional argument. |
| 65 | +# encoder_inputs = bert_pack_inputs(tokenized_inputs) |
| 66 | +# ### |
| 67 | + |
| 68 | + |
| 69 | + |
| 70 | +# encoder = hub.KerasLayer( |
| 71 | +# "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4", |
| 72 | +# trainable=True) |
| 73 | +# outputs = encoder(encoder_inputs) |
| 74 | +# pooled_output = outputs["pooled_output"] # [batch_size, 768]. |
| 75 | +# sequence_output = outputs["sequence_output"] # [batch_size, seq_length, 768]. |
| 76 | +# embedding_model = tf.keras.Model(inp, pooled_output) |
| 77 | + |
| 78 | + |
| 79 | +### |
48 | 80 | text_input = tf.keras.layers.Input(shape=(), dtype=tf.string) |
49 | 81 | preprocessor = hub.KerasLayer( |
50 | | - "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3") |
| 82 | + "https://kaggle.com/models/tensorflow/bert/TensorFlow2/en-uncased-preprocess/3") |
51 | 83 | encoder_inputs = preprocessor(text_input) |
52 | 84 | encoder = hub.KerasLayer( |
53 | | - "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4", |
| 85 | + "https://www.kaggle.com/models/tensorflow/bert/TensorFlow2/bert-en-uncased-l-10-h-128-a-2/2", |
54 | 86 | trainable=True) |
55 | 87 | outputs = encoder(encoder_inputs) |
56 | | -pooled_output = outputs["pooled_output"] # [batch_size, 768]. |
57 | | -sequence_output = outputs["sequence_output"] # [batch_size, seq_length, 768]. |
| 88 | +pooled_output = outputs["pooled_output"] # [batch_size, 128]. |
| 89 | +sequence_output = outputs["sequence_output"] # [batch_size, seq_length, 128]. |
| 90 | + |
58 | 91 | embedding_model = tf.keras.Model(text_input, pooled_output) |
| 92 | +### |
| 93 | + |
59 | 94 |
|
60 | 95 | ## Load the Data set |
61 | 96 | raw_text = pd.read_csv(data_file, dtype='object') |
|
68 | 103 | labels = labels.values |
69 | 104 | data = raw_text.values |
70 | 105 |
|
71 | | -labels_tensor = tf.constant(labels, dtype=tf.int8) |
| 106 | +labels_tensor = tf.constant(labels, dtype=tf.float32) |
72 | 107 | data_tensor = tf.constant(data, dtype=tf.string) |
73 | 108 |
|
74 | 109 | TIME = pendulum.now(tz='America/New_York').__str__()[:16]\ |
|
0 commit comments