Skip to content

Commit ae1cdb8

Browse files
authored
Merge pull request #190 from david-thrower/180-question-leakage-potentially-causing-the-extraordinarily-low-rmse-for-ames-dataset
180 question leakage potentially causing the extraordinarily low rmse for ames dataset
2 parents bd36c2b + faad123 commit ae1cdb8

File tree

4 files changed

+50
-38
lines changed

4 files changed

+50
-38
lines changed

.github/workflows/automerge.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ name: Python application
55

66
on:
77
push:
8-
branches: [ "main", "187-upgrade-transformers" ]
8+
branches: [ "main", "180-question-leakage-potentially-causing-the-extraordinarily-low-rmse-for-ames-dataset" ]
99

1010
permissions:
1111
contents: read

realnn-regression-example-ames-no-preproc.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
###
1818

19+
LABEL_COLUMN = 'price'
20+
1921
## your data:
2022

2123

@@ -30,9 +32,11 @@
3032

3133
raw_data = pd.read_csv('ames.csv')
3234
needed_cols = [
33-
col for col in raw_data.columns if raw_data[col].dtype != 'object']
35+
col for col in raw_data.columns
36+
if raw_data[col].dtype != 'object'
37+
and col != LABEL_COLUMN]
3438
data_numeric = raw_data[needed_cols].fillna(0).astype(float)
35-
label = raw_data.pop('price')
39+
label = raw_data.pop(LABEL_COLUMN)
3640

3741
data_np = data_numeric.values
3842

regression-example-ames-no-preproc-val-set.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
###
1818

19-
## your data:
19+
LABEL_COLUMN = 'price'
2020

2121

2222
TIME = pendulum.now().__str__()[:16]\
@@ -103,11 +103,14 @@ def hash_based_split(df, # Pandas dataframe
103103

104104
# white = pd.read_csv('wine_data.csv')
105105

106+
106107
raw_data = pd.read_csv('ames.csv')
107108
needed_cols = [
108-
col for col in raw_data.columns if raw_data[col].dtype != 'object']
109+
col for col in raw_data.columns
110+
if raw_data[col].dtype != 'object'
111+
and col != LABEL_COLUMN]
109112
data_numeric = raw_data[needed_cols].fillna(0).astype(float)
110-
label = raw_data.pop('price')
113+
label = raw_data.pop(LABEL_COLUMN)
111114

112115

113116
train_df, train_labels_pd, val_df, val_labels_pd =\
@@ -147,19 +150,20 @@ def hash_based_split(df, # Pandas dataframe
147150
# Params for a training function (Approximately the oprma
148151
# discovered in a bayesian tuning study done on Katib)
149152

153+
150154
meta_trial_number = 0 # In distributed training set this to a random number
151-
activation = "gelu"
152-
predecessor_level_connection_affinity_factor_first = 19.613
153-
predecessor_level_connection_affinity_factor_main = 0.5518
154-
max_consecutive_lateral_connections = 34
155-
p_lateral_connection = 0.36014
156-
num_lateral_connection_tries_per_unit = 11
157-
learning_rate = 0.095
158-
epochs = 145
159-
batch_size = 634
160-
maximum_levels = 5
161-
maximum_units_per_level = 5
162-
maximum_neurons_per_unit = 25
155+
activation = 'swish'
156+
predecessor_level_connection_affinity_factor_first = 0.506486683067576
157+
predecessor_level_connection_affinity_factor_main = 1.6466748663373876
158+
max_consecutive_lateral_connections = 35
159+
p_lateral_connection = 3.703218275217572
160+
num_lateral_connection_tries_per_unit = 12
161+
learning_rate = 0.02804912925494706
162+
epochs = 130
163+
batch_size = 78
164+
maximum_levels = 4
165+
maximum_units_per_level = 3
166+
maximum_neurons_per_unit = 3
163167

164168

165169
cerebros =\
@@ -172,11 +176,11 @@ def hash_based_split(df, # Pandas dataframe
172176
validation_split=0.0,
173177
direction='minimize',
174178
metric_to_rank_by='val_root_mean_squared_error',
175-
minimum_levels=1,
179+
minimum_levels=4,
176180
maximum_levels=maximum_levels,
177-
minimum_units_per_level=1,
181+
minimum_units_per_level=2,
178182
maximum_units_per_level=maximum_units_per_level,
179-
minimum_neurons_per_unit=1,
183+
minimum_neurons_per_unit=3,
180184
maximum_neurons_per_unit=maximum_neurons_per_unit,
181185
validation_data=(val_x, val_labels),
182186
activation=activation,

regression-example-ames-no-preproc.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616

1717
###
1818

19+
LABEL_COLUMN = 'price'
20+
1921
## your data:
2022

2123

@@ -30,9 +32,11 @@
3032

3133
raw_data = pd.read_csv('ames.csv')
3234
needed_cols = [
33-
col for col in raw_data.columns if raw_data[col].dtype != 'object']
35+
col for col in raw_data.columns
36+
if raw_data[col].dtype != 'object'
37+
and col != LABEL_COLUMN]
3438
data_numeric = raw_data[needed_cols].fillna(0).astype(float)
35-
label = raw_data.pop('price')
39+
label = raw_data.pop(LABEL_COLUMN)
3640

3741
data_np = data_numeric.values
3842

@@ -52,18 +56,18 @@
5256
# discovered in a bayesian tuning study done on Katib)
5357

5458
meta_trial_number = 0 # In distributed training set this to a random number
55-
activation = "gelu"
56-
predecessor_level_connection_affinity_factor_first = 19.613
57-
predecessor_level_connection_affinity_factor_main = 0.5518
58-
max_consecutive_lateral_connections = 34
59-
p_lateral_connection = 0.36014
60-
num_lateral_connection_tries_per_unit = 11
61-
learning_rate = 0.095
62-
epochs = 145
63-
batch_size = 634
64-
maximum_levels = 5
65-
maximum_units_per_level = 5
66-
maximum_neurons_per_unit = 25
59+
activation = 'swish'
60+
predecessor_level_connection_affinity_factor_first = 0.506486683067576
61+
predecessor_level_connection_affinity_factor_main = 1.6466748663373876
62+
max_consecutive_lateral_connections = 35
63+
p_lateral_connection = 3.703218275217572
64+
num_lateral_connection_tries_per_unit = 12
65+
learning_rate = 0.02804912925494706
66+
epochs = 130
67+
batch_size = 78
68+
maximum_levels = 4
69+
maximum_units_per_level = 3
70+
maximum_neurons_per_unit = 3
6771

6872

6973
cerebros =\
@@ -76,11 +80,11 @@
7680
validation_split=0.35,
7781
direction='minimize',
7882
metric_to_rank_by='val_root_mean_squared_error',
79-
minimum_levels=1,
83+
minimum_levels=4,
8084
maximum_levels=maximum_levels,
81-
minimum_units_per_level=1,
85+
minimum_units_per_level=2,
8286
maximum_units_per_level=maximum_units_per_level,
83-
minimum_neurons_per_unit=1,
87+
minimum_neurons_per_unit=3,
8488
maximum_neurons_per_unit=maximum_neurons_per_unit,
8589
activation=activation,
8690
final_activation=None,

0 commit comments

Comments
 (0)