Fixing serious bug in storing networks during training

Ricardicus · Ricardicus · commit da07da4ed0b7 · 2019-12-23T16:56:40.000+01:00
diff --git a/lstm.c b/lstm.c
@@ -1609,7 +1609,7 @@ void lstm_model_regularization(lstm_model_t* model, lstm_model_t* gradients)
 //						model, number of training points, X_train, Y_train
 void lstm_train(lstm_model_t** model_layers, lstm_model_parameters_t *params,
   set_t* char_index_mapping, unsigned int training_points,
-  int* X_train, int* Y_train, unsigned int layers)
+  int* X_train, int* Y_train, unsigned int layers, double *loss_out)
 {
   int status = 0;
   unsigned int p, i = 0, b = 0, q = 0, e1 = 0, e2 = 0,
@@ -1942,17 +1942,13 @@ void lstm_train(lstm_model_t** model_layers, lstm_model_parameters_t *params,
       lstm_store_progress(store_progress_file_name, n, loss);
 
     if ( store_network_every && !(n % store_network_every) ) {
-      FILE *fp_w = fopen(params->store_network_name_raw, "w");
-      if ( fp_w != NULL ) {
-        lstm_store_net_layers(model_layers, fp_w, layers);
-        fclose(fp_w);
-      }
+      lstm_store(
+        params->store_network_name_raw,
+        char_index_mapping,
+        model_layers,
+        layers);
       lstm_store_net_layers_as_json(model_layers, params->store_network_name_json,
         params->store_char_indx_map_name, char_index_mapping, layers);
-      printf("\nStored the net as: '%s'\nYou can use that file in the .html interface.\n", 
-      params->store_network_name_json);
-      printf("The net in its raw format is stored as: '%s'.\nYou can use that with the -r flag \
-      to continue refining the weights.\n", params->store_network_name_raw); 
     }
 
     if ( b + params->mini_batch_size >= training_points )
@@ -1972,6 +1968,9 @@ void lstm_train(lstm_model_t** model_layers, lstm_model_parameters_t *params,
     ++n;
   }
 
+  // Reporting the loss value
+  *loss_out = loss;
+
   p = 0;
   while ( p < layers ) {
     lstm_values_next_cache_free(d_next_layers[p]);
diff --git a/lstm.h b/lstm.h
@@ -280,10 +280,13 @@ initialzed with \ref lstm_init_model.
 if X[...] = 'hello' => Y[...] = 'ello ').
 * @param layers number of layers in the network, the number of models \p model \
 is pointing to. Internally if layers is L, then input is given to model[L-1] and \
-output collected at model[0].  
+output collected at model[0].
+* @param loss the value of the loss function, put under a smoothing \
+moving average filter, after the training has been completed.
 */ 
 void lstm_train(lstm_model_t** model, lstm_model_parameters_t*params,
-  set_t* set, unsigned int training_points, int *X, int *Y, unsigned int layers);
+  set_t* set, unsigned int training_points, int *X, int *Y, unsigned int layers,
+  double *loss);
 /**
 * If you are training on textual data, this function can be used 
 * to sample and output from the network directly to stdout. 
diff --git a/main.c b/main.c
@@ -30,13 +30,13 @@ void store_the_net_layers(int signo)
 {
   if ( SIGINT == signo ) {
     if ( model_layers != NULL ){
-      lstm_store(STD_LOADABLE_NET_NAME, &set,
+      lstm_store(params.store_network_name_raw, &set,
       model_layers, params.layers);
-      lstm_store_net_layers_as_json(model_layers, STD_JSON_NET_NAME, JSON_KEY_NAME_SET, &set, params.layers);
+      lstm_store_net_layers_as_json(model_layers, params.store_network_name_json, JSON_KEY_NAME_SET, &set, params.layers);
       printf("\nStored the net as: '%s'\nYou can use that file in the .html interface.\n", 
-      STD_JSON_NET_NAME);
+      params.store_network_name_json );
       printf("The net in its raw format is stored as: '%s'.\nYou can use that with the -r flag \
-  to continue refining the weights.\n", STD_LOADABLE_NET_NAME); 
+to continue refining the weights.\n", params.store_network_name_raw); 
     } else {
       printf("\nFailed to store the net!\n");
       exit(-1);
@@ -65,6 +65,7 @@ void usage(char *argv[]) {
   printf("    -out: number of characters to output directly, note: a network and a datafile must be provided.\r\n");
   printf("    -L  : Number of layers, may not exceed %d\r\n", LSTM_MAX_LAYERS);
   printf("    -N  : Number of neurons in every layer\r\n");
+  printf("    -vr : Verbosity level. Set to zero and only the loss function after and not during training will be printed.\n");
   printf("\r\n");
   printf("Check std_conf.h to see what default values are used, these are set during compilation.\r\n");
   printf("\r\n");
@@ -124,6 +125,8 @@ void parse_input_args(int argc, char** argv)
       if ( params.layers > LSTM_MAX_LAYERS ) {
         usage(argv);
       }
+    } else if ( !strcmp(argv[a], "-vr") ) {
+      params.print_progress = !!atoi(argv[a+1]);
     }
 
     a += 2;
@@ -192,7 +195,7 @@ int main(int argc, char *argv[])
   params.store_progress_every_x_iterations = STORE_PROGRESS_EVERY_X_ITERATIONS;
   params.store_progress_file_name = PROGRESS_FILE_NAME;
   params.store_network_name_raw = STD_LOADABLE_NET_NAME;
-  params.store_network_name_json = STD_LOADABLE_NET_NAME;
+  params.store_network_name_json = STD_JSON_NET_NAME;
   params.store_char_indx_map_name = JSON_KEY_NAME_SET;
 
   srand( time ( NULL ) );
@@ -336,6 +339,7 @@ Reallocating space in network input and output layer to accommodate this new fea
     lstm_output_string_from_string(model_layers, &set, argv[5], params.layers, 128);
 
   } else {
+    double loss;
 
     assert(params.layers > 0);
 
@@ -362,9 +366,11 @@ Reallocating space in network input and output layer to accommodate this new fea
       file_size,
       X_train,
       Y_train,
-      params.layers
+      params.layers,
+      &loss
     );
 
+    printf("Loss after training: %lf\n", loss);
   }
 
   free(model_layers);