Skip to content

Commit da07da4

Browse files
committed
Fixing serious bug in storing networks during training
1 parent 0027b14 commit da07da4

File tree

3 files changed

+26
-18
lines changed

3 files changed

+26
-18
lines changed

lstm.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1609,7 +1609,7 @@ void lstm_model_regularization(lstm_model_t* model, lstm_model_t* gradients)
16091609
// model, number of training points, X_train, Y_train
16101610
void lstm_train(lstm_model_t** model_layers, lstm_model_parameters_t *params,
16111611
set_t* char_index_mapping, unsigned int training_points,
1612-
int* X_train, int* Y_train, unsigned int layers)
1612+
int* X_train, int* Y_train, unsigned int layers, double *loss_out)
16131613
{
16141614
int status = 0;
16151615
unsigned int p, i = 0, b = 0, q = 0, e1 = 0, e2 = 0,
@@ -1942,17 +1942,13 @@ void lstm_train(lstm_model_t** model_layers, lstm_model_parameters_t *params,
19421942
lstm_store_progress(store_progress_file_name, n, loss);
19431943

19441944
if ( store_network_every && !(n % store_network_every) ) {
1945-
FILE *fp_w = fopen(params->store_network_name_raw, "w");
1946-
if ( fp_w != NULL ) {
1947-
lstm_store_net_layers(model_layers, fp_w, layers);
1948-
fclose(fp_w);
1949-
}
1945+
lstm_store(
1946+
params->store_network_name_raw,
1947+
char_index_mapping,
1948+
model_layers,
1949+
layers);
19501950
lstm_store_net_layers_as_json(model_layers, params->store_network_name_json,
19511951
params->store_char_indx_map_name, char_index_mapping, layers);
1952-
printf("\nStored the net as: '%s'\nYou can use that file in the .html interface.\n",
1953-
params->store_network_name_json);
1954-
printf("The net in its raw format is stored as: '%s'.\nYou can use that with the -r flag \
1955-
to continue refining the weights.\n", params->store_network_name_raw);
19561952
}
19571953

19581954
if ( b + params->mini_batch_size >= training_points )
@@ -1972,6 +1968,9 @@ void lstm_train(lstm_model_t** model_layers, lstm_model_parameters_t *params,
19721968
++n;
19731969
}
19741970

1971+
// Reporting the loss value
1972+
*loss_out = loss;
1973+
19751974
p = 0;
19761975
while ( p < layers ) {
19771976
lstm_values_next_cache_free(d_next_layers[p]);

lstm.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,10 +280,13 @@ initialzed with \ref lstm_init_model.
280280
if X[...] = 'hello' => Y[...] = 'ello ').
281281
* @param layers number of layers in the network, the number of models \p model \
282282
is pointing to. Internally if layers is L, then input is given to model[L-1] and \
283-
output collected at model[0].
283+
output collected at model[0].
284+
* @param loss the value of the loss function, put under a smoothing \
285+
moving average filter, after the training has been completed.
284286
*/
285287
void lstm_train(lstm_model_t** model, lstm_model_parameters_t*params,
286-
set_t* set, unsigned int training_points, int *X, int *Y, unsigned int layers);
288+
set_t* set, unsigned int training_points, int *X, int *Y, unsigned int layers,
289+
double *loss);
287290
/**
288291
* If you are training on textual data, this function can be used
289292
* to sample and output from the network directly to stdout.

main.c

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ void store_the_net_layers(int signo)
3030
{
3131
if ( SIGINT == signo ) {
3232
if ( model_layers != NULL ){
33-
lstm_store(STD_LOADABLE_NET_NAME, &set,
33+
lstm_store(params.store_network_name_raw, &set,
3434
model_layers, params.layers);
35-
lstm_store_net_layers_as_json(model_layers, STD_JSON_NET_NAME, JSON_KEY_NAME_SET, &set, params.layers);
35+
lstm_store_net_layers_as_json(model_layers, params.store_network_name_json, JSON_KEY_NAME_SET, &set, params.layers);
3636
printf("\nStored the net as: '%s'\nYou can use that file in the .html interface.\n",
37-
STD_JSON_NET_NAME);
37+
params.store_network_name_json );
3838
printf("The net in its raw format is stored as: '%s'.\nYou can use that with the -r flag \
39-
to continue refining the weights.\n", STD_LOADABLE_NET_NAME);
39+
to continue refining the weights.\n", params.store_network_name_raw);
4040
} else {
4141
printf("\nFailed to store the net!\n");
4242
exit(-1);
@@ -65,6 +65,7 @@ void usage(char *argv[]) {
6565
printf(" -out: number of characters to output directly, note: a network and a datafile must be provided.\r\n");
6666
printf(" -L : Number of layers, may not exceed %d\r\n", LSTM_MAX_LAYERS);
6767
printf(" -N : Number of neurons in every layer\r\n");
68+
printf(" -vr : Verbosity level. Set to zero and only the loss function after and not during training will be printed.\n");
6869
printf("\r\n");
6970
printf("Check std_conf.h to see what default values are used, these are set during compilation.\r\n");
7071
printf("\r\n");
@@ -124,6 +125,8 @@ void parse_input_args(int argc, char** argv)
124125
if ( params.layers > LSTM_MAX_LAYERS ) {
125126
usage(argv);
126127
}
128+
} else if ( !strcmp(argv[a], "-vr") ) {
129+
params.print_progress = !!atoi(argv[a+1]);
127130
}
128131

129132
a += 2;
@@ -192,7 +195,7 @@ int main(int argc, char *argv[])
192195
params.store_progress_every_x_iterations = STORE_PROGRESS_EVERY_X_ITERATIONS;
193196
params.store_progress_file_name = PROGRESS_FILE_NAME;
194197
params.store_network_name_raw = STD_LOADABLE_NET_NAME;
195-
params.store_network_name_json = STD_LOADABLE_NET_NAME;
198+
params.store_network_name_json = STD_JSON_NET_NAME;
196199
params.store_char_indx_map_name = JSON_KEY_NAME_SET;
197200

198201
srand( time ( NULL ) );
@@ -336,6 +339,7 @@ Reallocating space in network input and output layer to accommodate this new fea
336339
lstm_output_string_from_string(model_layers, &set, argv[5], params.layers, 128);
337340

338341
} else {
342+
double loss;
339343

340344
assert(params.layers > 0);
341345

@@ -362,9 +366,11 @@ Reallocating space in network input and output layer to accommodate this new fea
362366
file_size,
363367
X_train,
364368
Y_train,
365-
params.layers
369+
params.layers,
370+
&loss
366371
);
367372

373+
printf("Loss after training: %lf\n", loss);
368374
}
369375

370376
free(model_layers);

0 commit comments

Comments
 (0)