Skip to content

Commit 11a067c

Browse files
committed
code refactoring + added argument parser
1 parent 024d5d0 commit 11a067c

File tree

4 files changed

+136
-59
lines changed

4 files changed

+136
-59
lines changed

lstm.c

Lines changed: 52 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1212,37 +1212,39 @@ void lstm_model_regularization(lstm_model_t* model, lstm_model_t* gradients)
12121212
vectors_add_scalar_multiply(gradients->bf, model->bf, model->N, lambda);
12131213
}
12141214

1215-
// model, number of training points, X_train, Y_train, number of iterations
1216-
void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_index_mapping, unsigned int training_points, int* X_train, int* Y_train, unsigned long iterations, int layers)
1215+
// model, number of training points, X_train, Y_train
1216+
void lstm_train(lstm_model_t** model_layers, lstm_model_parameters_t *params, set_T* char_index_mapping, unsigned int training_points, int* X_train, int* Y_train, int layers)
12171217
{
12181218
int N,F,S, status = 0, p = 0;
12191219
unsigned int i = 0, b = 0, q = 0, e1 = 0, e2 = 0, e3, record_iteration = 0, tmp_count, trailing;
1220-
unsigned long n = 0, decrease_threshold = model->params->learning_rate_decrease_threshold, epoch = 0;
1220+
unsigned long n = 0, epoch = 0;
12211221
double loss = -1, loss_tmp = 0.0, record_keeper = 0.0;
1222-
double initial_learning_rate = model->params->learning_rate;
1222+
double initial_learning_rate = params->learning_rate;
12231223
time_t time_iter;
12241224
char time_buffer[40];
1225-
int stateful = model->params->stateful, decrease_lr = model->params->decrease_lr;
1225+
unsigned long iterations = params->iterations;
1226+
int stateful = params->stateful, decrease_lr = params->decrease_lr;
12261227
// configuration for output printing during training
1227-
int print_progress = model->params->print_progress;
1228-
int print_progress_iterations = model->params->print_progress_iterations;
1229-
int print_progress_sample_output = model->params->print_progress_sample_output;
1230-
int print_progress_to_file = model->params->print_progress_to_file;
1231-
int print_progress_number_of_chars = model->params->print_progress_number_of_chars;
1232-
char *print_progress_to_file_name = model->params->print_sample_output_to_file_name;
1233-
char *print_progress_to_file_arg = model->params->print_sample_output_to_file_arg;
1234-
int store_progress_evert_x_iterations = model->params->store_progress_evert_x_iterations;
1235-
char *store_progress_file_name = model->params->store_progress_file_name;
1228+
int print_progress = params->print_progress;
1229+
int print_progress_iterations = params->print_progress_iterations;
1230+
int print_progress_sample_output = params->print_progress_sample_output;
1231+
int print_progress_to_file = params->print_progress_to_file;
1232+
int print_progress_number_of_chars = params->print_progress_number_of_chars;
1233+
char *print_progress_to_file_name = params->print_sample_output_to_file_name;
1234+
char *print_progress_to_file_arg = params->print_sample_output_to_file_arg;
1235+
int store_progress_every_x_iterations = params->store_progress_every_x_iterations;
1236+
char *store_progress_file_name = params->store_progress_file_name;
1237+
int store_network_every = params->store_network_every;
12361238

12371239
lstm_values_state_t ** stateful_d_next;
12381240
lstm_values_cache_t ***cache_layers;
12391241
lstm_values_next_cache_t **d_next_layers;
12401242

12411243
lstm_model_t **gradient_layers, **gradient_layers_entry, **M_layers, **R_layers;
12421244

1243-
N = model->N;
1244-
F = model->F;
1245-
S = model->S;
1245+
N = model_layers[0]->N;
1246+
F = model_layers[0]->F;
1247+
S = model_layers[0]->S;
12461248

12471249
double first_layer_input[F];
12481250

@@ -1252,7 +1254,7 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
12521254
lstm_init_fail("Failed to allocate memory for stateful backprop through time deltas\n");
12531255
i = 0;
12541256
while ( i < layers) {
1255-
stateful_d_next[i] = calloc( training_points/model->params->mini_batch_size + 1, sizeof(lstm_values_state_t));
1257+
stateful_d_next[i] = calloc( training_points/params->mini_batch_size + 1, sizeof(lstm_values_state_t));
12561258
if ( stateful_d_next[i] == NULL )
12571259
lstm_init_fail("Failed to allocate memory for stateful backprop through time deltas, inner in layer\n");
12581260
lstm_values_state_init(&stateful_d_next[i], N);
@@ -1267,12 +1269,12 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
12671269
lstm_init_fail("Failed to allocate memory for the caches\n");
12681270

12691271
while ( i < layers ) {
1270-
cache_layers[i] = calloc(model->params->mini_batch_size + 1, sizeof(lstm_values_cache_t*));
1272+
cache_layers[i] = calloc(params->mini_batch_size + 1, sizeof(lstm_values_cache_t*));
12711273
if ( cache_layers[i] == NULL )
12721274
lstm_init_fail("Failed to allocate memory for the caches\n");
12731275

12741276
p = 0;
1275-
while ( p < model->params->mini_batch_size + 1 ){
1277+
while ( p < params->mini_batch_size + 1 ){
12761278
cache_layers[i][p] = lstm_cache_container_init(N, F);
12771279
if ( cache_layers[i][p] == NULL )
12781280
lstm_init_fail("Failed to allocate memory for the caches\n");
@@ -1294,7 +1296,7 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
12941296
if ( d_next_layers == NULL )
12951297
lstm_init_fail("Failed to allocate memory for backprop through time deltas\n");
12961298

1297-
if ( model->params->optimizer == OPTIMIZE_ADAM ) {
1299+
if ( params->optimizer == OPTIMIZE_ADAM ) {
12981300

12991301
M_layers = calloc(layers, sizeof(lstm_model_t*) );
13001302
R_layers = calloc(layers, sizeof(lstm_model_t*) );
@@ -1307,13 +1309,13 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
13071309

13081310
i = 0;
13091311
while ( i < layers ) {
1310-
lstm_init_model(F, N, &gradient_layers[i], 1, model->params);
1311-
lstm_init_model(F, N, &gradient_layers_entry[i], 1, model->params);
1312+
lstm_init_model(F, N, &gradient_layers[i], 1, params);
1313+
lstm_init_model(F, N, &gradient_layers_entry[i], 1, params);
13121314
lstm_values_next_cache_init(&d_next_layers[i], N, F);
13131315

1314-
if ( model->params->optimizer == OPTIMIZE_ADAM ) {
1315-
lstm_init_model(F, N, &M_layers[i], 1, model->params);
1316-
lstm_init_model(F, N, &R_layers[i], 1, model->params);
1316+
if ( params->optimizer == OPTIMIZE_ADAM ) {
1317+
lstm_init_model(F, N, &M_layers[i], 1, params);
1318+
lstm_init_model(F, N, &R_layers[i], 1, params);
13171319
}
13181320

13191321
++i;
@@ -1341,9 +1343,9 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
13411343

13421344
unsigned int check = i % training_points;
13431345

1344-
trailing = model->params->mini_batch_size;
1346+
trailing = params->mini_batch_size;
13451347

1346-
if ( i + model->params->mini_batch_size >= training_points ) {
1348+
if ( i + params->mini_batch_size >= training_points ) {
13471349
trailing = training_points - i;
13481350
}
13491351

@@ -1385,7 +1387,7 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
13851387
if ( loss < 0 )
13861388
loss = loss_tmp;
13871389

1388-
loss = loss_tmp * model->params->loss_moving_avg + (1 - model->params->loss_moving_avg) * loss;
1390+
loss = loss_tmp * params->loss_moving_avg + (1 - params->loss_moving_avg) * loss;
13891391

13901392
if ( n == 0 )
13911393
record_keeper = loss;
@@ -1450,18 +1452,18 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
14501452
p = 0;
14511453
while ( p < layers ) {
14521454

1453-
if ( model->params->gradient_clip )
1454-
gradients_clip(gradient_layers[p], model->params->gradient_clip_limit);
1455+
if ( params->gradient_clip )
1456+
gradients_clip(gradient_layers[p], params->gradient_clip_limit);
14551457

1456-
if ( model->params->gradient_fit )
1457-
gradients_fit(gradient_layers[p], model->params->gradient_clip_limit);
1458+
if ( params->gradient_fit )
1459+
gradients_fit(gradient_layers[p], params->gradient_clip_limit);
14581460

14591461
++p;
14601462
}
14611463

14621464
p = 0;
14631465

1464-
switch ( model->params->optimizer ) {
1466+
switch ( params->optimizer ) {
14651467
case OPTIMIZE_ADAM:
14661468
while ( p < layers ) {
14671469
gradients_adam_optimizer(model_layers[p], gradient_layers[p], M_layers[p], R_layers[p], n);
@@ -1495,7 +1497,7 @@ lstm_model_parameters_t has a field called 'optimizer'. Set this value to:\n\
14951497
time(&time_iter);
14961498
strftime(time_buffer, sizeof time_buffer, "%X", localtime(&time_iter));
14971499

1498-
printf("%s Iteration: %lu (epoch: %lu), Loss: %lf, record: %lf (iteration: %d), LR: %lf\n", time_buffer, n, epoch, loss, record_keeper, record_iteration, model->params->learning_rate);
1500+
printf("%s Iteration: %lu (epoch: %lu), Loss: %lf, record: %lf (iteration: %d), LR: %lf\n", time_buffer, n, epoch, loss, record_keeper, record_iteration, params->learning_rate);
14991501

15001502
if ( print_progress_sample_output ) {
15011503
printf("=====================================================\n");
@@ -1516,20 +1518,29 @@ lstm_model_parameters_t has a field called 'optimizer'. Set this value to:\n\
15161518
fflush(stdout);
15171519
}
15181520

1519-
if ( store_progress_evert_x_iterations && !(n % store_progress_evert_x_iterations ))
1521+
if ( store_progress_every_x_iterations && !(n % store_progress_every_x_iterations ))
15201522
lstm_store_progress(store_progress_file_name, n, loss);
15211523

1522-
if ( b + model->params->mini_batch_size >= training_points )
1524+
if ( store_network_every && !(n % store_network_every) ) {
1525+
lstm_store_net_layers(model_layers, params->store_network_name_raw, layers);
1526+
lstm_store_net_layers_as_json(model_layers, params->store_network_name_json, params->store_char_indx_map_name, char_index_mapping, layers);
1527+
printf("\nStored the net as: '%s'\nYou can use that file in the .html interface.\n",
1528+
params->store_network_name_json);
1529+
printf("The net in its raw format is stored as: '%s'.\nYou can use that with the -r flag \
1530+
to continue refining the weights.\n", params->store_network_name_raw);
1531+
}
1532+
1533+
if ( b + params->mini_batch_size >= training_points )
15231534
epoch++;
15241535

1525-
i = (b + model->params->mini_batch_size) % training_points;
1536+
i = (b + params->mini_batch_size) % training_points;
15261537

1527-
if ( i < model->params->mini_batch_size){
1538+
if ( i < params->mini_batch_size){
15281539
i = 0;
15291540
}
15301541

15311542
if ( decrease_lr ) {
1532-
model->params->learning_rate = initial_learning_rate / ( 1.0 + n / model->params->learning_rate_decrease );
1543+
params->learning_rate = initial_learning_rate / ( 1.0 + n / params->learning_rate_decrease );
15331544
// printf("learning rate: %lf\n", model->params->learning_rate);
15341545
}
15351546

@@ -1541,7 +1552,7 @@ lstm_model_parameters_t has a field called 'optimizer'. Set this value to:\n\
15411552
lstm_values_next_cache_free(d_next_layers[p]);
15421553

15431554
i = 0;
1544-
while ( i < model->params->mini_batch_size) {
1555+
while ( i < params->mini_batch_size) {
15451556
lstm_cache_container_free(cache_layers[p][i]);
15461557
lstm_cache_container_free(cache_layers[p][i]);
15471558
++i;

lstm.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ typedef struct lstm_model_parameters_t {
5252
int model_regularize;
5353
int stateful;
5454
int decrease_lr;
55+
double learning_rate_decrease;
5556

5657
// How many layers
5758
int layers;
@@ -64,14 +65,17 @@ typedef struct lstm_model_parameters_t {
6465
int print_progress_number_of_chars;
6566
char *print_sample_output_to_file_name;
6667
char *print_sample_output_to_file_arg;
67-
int store_progress_evert_x_iterations;
68+
int store_progress_every_x_iterations;
6869
char *store_progress_file_name;
70+
int store_network_every;
71+
char *store_network_name_raw;
72+
char *store_network_name_json;
73+
char *store_char_indx_map_name;
6974

70-
int learning_rate_decrease_threshold;
71-
double learning_rate_decrease;
7275
// General parameters
7376
int mini_batch_size;
7477
double gradient_clip_limit;
78+
unsigned long iterations;
7579
} lstm_model_parameters_t;
7680

7781
typedef struct lstm_model_t
@@ -180,7 +184,7 @@ void lstm_store_progress(const char*, unsigned int, double);
180184

181185
// The main entry point
182186
// model, number of training points, X_train, Y_train, number of iterations
183-
void lstm_train(lstm_model_t*, lstm_model_t**, set_T*, unsigned int, int*, int*, unsigned long, int);
187+
void lstm_train(lstm_model_t**, lstm_model_parameters_t*, set_T*, unsigned int, int*, int*, int);
184188
// Used to output a given number of characters from the net based on an input char
185189
void lstm_output_string_layers(lstm_model_t **, set_T*, int, int, int);
186190
void lstm_output_string_from_string_layers(lstm_model_t **, set_T*, char *, int, int);

main.c

Lines changed: 76 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,71 @@ to continue refining the weights.\n", STD_LOADABLE_NET_NAME);
3535
return;
3636
}
3737

38+
void usage(char *argv[]) {
39+
printf("Usage: %s datafile [flag value]*\n", argv[0]);
40+
printf("\n");
41+
printf("Flags can be used to change the training procedure.\n");
42+
printf("The flags require a value to be passed as the following argument.\n");
43+
printf(" E.g., this is how you traing with a learning rate set to 0.03:\n");
44+
printf(" %s datafile -lr 0.03\n", argv[0]);
45+
printf("\n");
46+
printf("The following flags are available:\n");
47+
printf(" -r : read a previously trained network, the name of which is currently configured to be '%s'.\n", STD_LOADABLE_NET_NAME);
48+
printf(" -lr: learning rate that is to be used during training, see the example above.\n");
49+
printf(" -it: the number of iterations used for training (not to be confused with epochs).\n");
50+
printf(" -mb: mini batch size.\n");
51+
printf(" -dl: decrease the learning rate over time, according to lr(n+1) <- lr(n) / (1 + n/value).\n");
52+
printf(" -st: number of iterations between how the network is continously stored during training (.json and .net).\n");
53+
printf("\n");
54+
printf("Check std_conf.h to see what default values are used, these are set during compilation.\n");
55+
printf("\n");
56+
printf("%s compiled %s %s\n", argv[0], __DATE__, __TIME__);
57+
exit(1);
58+
}
59+
60+
void parse_input_args(int argc, char** argv, lstm_model_parameters_t* params)
61+
{
62+
int a = 0;
63+
64+
while ( a < argc ) {
65+
66+
if ( argc <= (a+1) )
67+
break; // All flags have values attributed to them
68+
69+
if ( !strcmp(argv[a], "-r") ) {
70+
lstm_read_net_layers(model_layers, argv[a + 1], LAYERS);
71+
} else if ( !strcmp(argv[a], "-lr") ) {
72+
params->learning_rate = atof(argv[a + 1]);
73+
if ( params->learning_rate == 0.0 ) {
74+
usage(argv);
75+
}
76+
} else if ( !strcmp(argv[a], "-mb") ) {
77+
params->mini_batch_size = atoi(argv[a + 1]);
78+
if ( params->mini_batch_size <= 0 ) {
79+
usage(argv);
80+
}
81+
} else if ( !strcmp(argv[a], "-it") ) {
82+
params->iterations = (unsigned long) atol(argv[a + 1]);
83+
if ( params->iterations == 0 ) {
84+
usage(argv);
85+
}
86+
} else if ( !strcmp(argv[a], "-dl") ) {
87+
params->learning_rate_decrease = atof(argv[a + 1]);
88+
if ( params->learning_rate_decrease == 0 ) {
89+
usage(argv);
90+
}
91+
params->decrease_lr = 1;
92+
} else if ( !strcmp(argv[a], "-st") ) {
93+
params->store_network_every = atoi(argv[a + 1]);
94+
if ( params->store_network_every == 0 ) {
95+
usage(argv);
96+
}
97+
}
98+
99+
a += 2;
100+
}
101+
}
102+
38103
int main(int argc, char *argv[])
39104
{
40105
int i = 0, c, p = 0;
@@ -43,9 +108,12 @@ int main(int argc, char *argv[])
43108
char * clean;
44109
FILE * fp;
45110

111+
int layers = LAYERS;
112+
46113
lstm_model_parameters_t params;
47114
memset(&params, 0, sizeof(params));
48115

116+
params.iterations = ITERATIONS;
49117
params.loss_moving_avg = LOSS_MOVING_AVG;
50118
params.learning_rate = STD_LEARNING_RATE;
51119
params.momentum = STD_MOMENTUM;
@@ -54,7 +122,6 @@ int main(int argc, char *argv[])
54122
params.mini_batch_size = MINI_BATCH_SIZE;
55123
params.gradient_clip_limit = GRADIENT_CLIP_LIMIT;
56124
params.learning_rate_decrease = STD_LEARNING_RATE_DECREASE;
57-
params.learning_rate_decrease_threshold = STD_LEARNING_RATE_THRESHOLD;
58125
params.stateful = STATEFUL;
59126
params.beta1 = 0.9;
60127
params.beta2 = 0.999;
@@ -72,13 +139,16 @@ int main(int argc, char *argv[])
72139
params.print_progress_number_of_chars = NUMBER_OF_CHARS_TO_DISPLAY_DURING_TRAINING;
73140
params.print_sample_output_to_file_arg = PRINT_SAMPLE_OUTPUT_TO_FILE_ARG;
74141
params.print_sample_output_to_file_name = PRINT_SAMPLE_OUTPUT_TO_FILE_NAME;
75-
params.store_progress_evert_x_iterations = STORE_PROGRESS_EVERY_X_ITERATIONS;
142+
params.store_progress_every_x_iterations = STORE_PROGRESS_EVERY_X_ITERATIONS;
76143
params.store_progress_file_name = PROGRESS_FILE_NAME;
144+
params.store_network_name_raw = STD_LOADABLE_NET_NAME;
145+
params.store_network_name_json = STD_LOADABLE_NET_NAME;
146+
params.store_char_indx_map_name = JSON_KEY_NAME_SET;
77147

78148
srand( time ( NULL ) );
79149

80150
if ( argc < 2 ) {
81-
printf("Usage: %s datafile [-r name_of_net_to_load]\n", argv[0]);
151+
usage(argv);
82152
return -1;
83153
}
84154

@@ -110,10 +180,6 @@ int main(int argc, char *argv[])
110180
X_train[sz++] = set_char_to_indx(&set,c);
111181
fclose(fp);
112182

113-
int layers = LAYERS;
114-
115-
params.layers = layers;
116-
117183
model_layers = calloc(layers, sizeof(lstm_model_t*));
118184

119185
if ( model_layers == NULL ) {
@@ -124,14 +190,12 @@ int main(int argc, char *argv[])
124190
p = 0;
125191

126192
while ( p < layers ) {
193+
// All layers have the same training parameters
127194
lstm_init_model(set_get_features(&set), NEURONS, &model_layers[p], 0, &params);
128195
++p;
129196
}
130197

131-
if ( argc >= 4 && !strcmp(argv[2], "-r") ) {
132-
lstm_read_net_layers(model_layers, argv[3], LAYERS);
133-
}
134-
198+
parse_input_args(argc, argv, &params);
135199

136200
if ( argc >= 6 && !strcmp(argv[4], "-c") ) {
137201
do {
@@ -149,13 +213,12 @@ int main(int argc, char *argv[])
149213
signal(SIGINT, store_the_net_layers);
150214

151215
lstm_train(
152-
model_layers[0],
153216
model_layers,
217+
&params,
154218
&set,
155219
file_size,
156220
X_train,
157221
Y_train,
158-
ITERATIONS,
159222
layers
160223
);
161224

0 commit comments

Comments
 (0)