@@ -1212,37 +1212,39 @@ void lstm_model_regularization(lstm_model_t* model, lstm_model_t* gradients)
12121212 vectors_add_scalar_multiply (gradients -> bf , model -> bf , model -> N , lambda );
12131213}
12141214
1215- // model, number of training points, X_train, Y_train, number of iterations
1216- void lstm_train (lstm_model_t * model , lstm_model_t * * model_layers , set_T * char_index_mapping , unsigned int training_points , int * X_train , int * Y_train , unsigned long iterations , int layers )
1215+ // model, number of training points, X_train, Y_train
1216+ void lstm_train (lstm_model_t * * model_layers , lstm_model_parameters_t * params , set_T * char_index_mapping , unsigned int training_points , int * X_train , int * Y_train , int layers )
12171217{
12181218 int N ,F ,S , status = 0 , p = 0 ;
12191219 unsigned int i = 0 , b = 0 , q = 0 , e1 = 0 , e2 = 0 , e3 , record_iteration = 0 , tmp_count , trailing ;
1220- unsigned long n = 0 , decrease_threshold = model -> params -> learning_rate_decrease_threshold , epoch = 0 ;
1220+ unsigned long n = 0 , epoch = 0 ;
12211221 double loss = -1 , loss_tmp = 0.0 , record_keeper = 0.0 ;
1222- double initial_learning_rate = model -> params -> learning_rate ;
1222+ double initial_learning_rate = params -> learning_rate ;
12231223 time_t time_iter ;
12241224 char time_buffer [40 ];
1225- int stateful = model -> params -> stateful , decrease_lr = model -> params -> decrease_lr ;
1225+ unsigned long iterations = params -> iterations ;
1226+ int stateful = params -> stateful , decrease_lr = params -> decrease_lr ;
12261227 // configuration for output printing during training
1227- int print_progress = model -> params -> print_progress ;
1228- int print_progress_iterations = model -> params -> print_progress_iterations ;
1229- int print_progress_sample_output = model -> params -> print_progress_sample_output ;
1230- int print_progress_to_file = model -> params -> print_progress_to_file ;
1231- int print_progress_number_of_chars = model -> params -> print_progress_number_of_chars ;
1232- char * print_progress_to_file_name = model -> params -> print_sample_output_to_file_name ;
1233- char * print_progress_to_file_arg = model -> params -> print_sample_output_to_file_arg ;
1234- int store_progress_evert_x_iterations = model -> params -> store_progress_evert_x_iterations ;
1235- char * store_progress_file_name = model -> params -> store_progress_file_name ;
1228+ int print_progress = params -> print_progress ;
1229+ int print_progress_iterations = params -> print_progress_iterations ;
1230+ int print_progress_sample_output = params -> print_progress_sample_output ;
1231+ int print_progress_to_file = params -> print_progress_to_file ;
1232+ int print_progress_number_of_chars = params -> print_progress_number_of_chars ;
1233+ char * print_progress_to_file_name = params -> print_sample_output_to_file_name ;
1234+ char * print_progress_to_file_arg = params -> print_sample_output_to_file_arg ;
1235+ int store_progress_every_x_iterations = params -> store_progress_every_x_iterations ;
1236+ char * store_progress_file_name = params -> store_progress_file_name ;
1237+ int store_network_every = params -> store_network_every ;
12361238
12371239 lstm_values_state_t * * stateful_d_next ;
12381240 lstm_values_cache_t * * * cache_layers ;
12391241 lstm_values_next_cache_t * * d_next_layers ;
12401242
12411243 lstm_model_t * * gradient_layers , * * gradient_layers_entry , * * M_layers , * * R_layers ;
12421244
1243- N = model -> N ;
1244- F = model -> F ;
1245- S = model -> S ;
1245+ N = model_layers [ 0 ] -> N ;
1246+ F = model_layers [ 0 ] -> F ;
1247+ S = model_layers [ 0 ] -> S ;
12461248
12471249 double first_layer_input [F ];
12481250
@@ -1252,7 +1254,7 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
12521254 lstm_init_fail ("Failed to allocate memory for stateful backprop through time deltas\n" );
12531255 i = 0 ;
12541256 while ( i < layers ) {
1255- stateful_d_next [i ] = calloc ( training_points /model -> params -> mini_batch_size + 1 , sizeof (lstm_values_state_t ));
1257+ stateful_d_next [i ] = calloc ( training_points /params -> mini_batch_size + 1 , sizeof (lstm_values_state_t ));
12561258 if ( stateful_d_next [i ] == NULL )
12571259 lstm_init_fail ("Failed to allocate memory for stateful backprop through time deltas, inner in layer\n" );
12581260 lstm_values_state_init (& stateful_d_next [i ], N );
@@ -1267,12 +1269,12 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
12671269 lstm_init_fail ("Failed to allocate memory for the caches\n" );
12681270
12691271 while ( i < layers ) {
1270- cache_layers [i ] = calloc (model -> params -> mini_batch_size + 1 , sizeof (lstm_values_cache_t * ));
1272+ cache_layers [i ] = calloc (params -> mini_batch_size + 1 , sizeof (lstm_values_cache_t * ));
12711273 if ( cache_layers [i ] == NULL )
12721274 lstm_init_fail ("Failed to allocate memory for the caches\n" );
12731275
12741276 p = 0 ;
1275- while ( p < model -> params -> mini_batch_size + 1 ){
1277+ while ( p < params -> mini_batch_size + 1 ){
12761278 cache_layers [i ][p ] = lstm_cache_container_init (N , F );
12771279 if ( cache_layers [i ][p ] == NULL )
12781280 lstm_init_fail ("Failed to allocate memory for the caches\n" );
@@ -1294,7 +1296,7 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
12941296 if ( d_next_layers == NULL )
12951297 lstm_init_fail ("Failed to allocate memory for backprop through time deltas\n" );
12961298
1297- if ( model -> params -> optimizer == OPTIMIZE_ADAM ) {
1299+ if ( params -> optimizer == OPTIMIZE_ADAM ) {
12981300
12991301 M_layers = calloc (layers , sizeof (lstm_model_t * ) );
13001302 R_layers = calloc (layers , sizeof (lstm_model_t * ) );
@@ -1307,13 +1309,13 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
13071309
13081310 i = 0 ;
13091311 while ( i < layers ) {
1310- lstm_init_model (F , N , & gradient_layers [i ], 1 , model -> params );
1311- lstm_init_model (F , N , & gradient_layers_entry [i ], 1 , model -> params );
1312+ lstm_init_model (F , N , & gradient_layers [i ], 1 , params );
1313+ lstm_init_model (F , N , & gradient_layers_entry [i ], 1 , params );
13121314 lstm_values_next_cache_init (& d_next_layers [i ], N , F );
13131315
1314- if ( model -> params -> optimizer == OPTIMIZE_ADAM ) {
1315- lstm_init_model (F , N , & M_layers [i ], 1 , model -> params );
1316- lstm_init_model (F , N , & R_layers [i ], 1 , model -> params );
1316+ if ( params -> optimizer == OPTIMIZE_ADAM ) {
1317+ lstm_init_model (F , N , & M_layers [i ], 1 , params );
1318+ lstm_init_model (F , N , & R_layers [i ], 1 , params );
13171319 }
13181320
13191321 ++ i ;
@@ -1341,9 +1343,9 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
13411343
13421344 unsigned int check = i % training_points ;
13431345
1344- trailing = model -> params -> mini_batch_size ;
1346+ trailing = params -> mini_batch_size ;
13451347
1346- if ( i + model -> params -> mini_batch_size >= training_points ) {
1348+ if ( i + params -> mini_batch_size >= training_points ) {
13471349 trailing = training_points - i ;
13481350 }
13491351
@@ -1385,7 +1387,7 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
13851387 if ( loss < 0 )
13861388 loss = loss_tmp ;
13871389
1388- loss = loss_tmp * model -> params -> loss_moving_avg + (1 - model -> params -> loss_moving_avg ) * loss ;
1390+ loss = loss_tmp * params -> loss_moving_avg + (1 - params -> loss_moving_avg ) * loss ;
13891391
13901392 if ( n == 0 )
13911393 record_keeper = loss ;
@@ -1450,18 +1452,18 @@ void lstm_train(lstm_model_t* model, lstm_model_t** model_layers, set_T* char_in
14501452 p = 0 ;
14511453 while ( p < layers ) {
14521454
1453- if ( model -> params -> gradient_clip )
1454- gradients_clip (gradient_layers [p ], model -> params -> gradient_clip_limit );
1455+ if ( params -> gradient_clip )
1456+ gradients_clip (gradient_layers [p ], params -> gradient_clip_limit );
14551457
1456- if ( model -> params -> gradient_fit )
1457- gradients_fit (gradient_layers [p ], model -> params -> gradient_clip_limit );
1458+ if ( params -> gradient_fit )
1459+ gradients_fit (gradient_layers [p ], params -> gradient_clip_limit );
14581460
14591461 ++ p ;
14601462 }
14611463
14621464 p = 0 ;
14631465
1464- switch ( model -> params -> optimizer ) {
1466+ switch ( params -> optimizer ) {
14651467 case OPTIMIZE_ADAM :
14661468 while ( p < layers ) {
14671469 gradients_adam_optimizer (model_layers [p ], gradient_layers [p ], M_layers [p ], R_layers [p ], n );
@@ -1495,7 +1497,7 @@ lstm_model_parameters_t has a field called 'optimizer'. Set this value to:\n\
14951497 time (& time_iter );
14961498 strftime (time_buffer , sizeof time_buffer , "%X" , localtime (& time_iter ));
14971499
1498- printf ("%s Iteration: %lu (epoch: %lu), Loss: %lf, record: %lf (iteration: %d), LR: %lf\n" , time_buffer , n , epoch , loss , record_keeper , record_iteration , model -> params -> learning_rate );
1500+ printf ("%s Iteration: %lu (epoch: %lu), Loss: %lf, record: %lf (iteration: %d), LR: %lf\n" , time_buffer , n , epoch , loss , record_keeper , record_iteration , params -> learning_rate );
14991501
15001502 if ( print_progress_sample_output ) {
15011503 printf ("=====================================================\n" );
@@ -1516,20 +1518,29 @@ lstm_model_parameters_t has a field called 'optimizer'. Set this value to:\n\
15161518 fflush (stdout );
15171519 }
15181520
1519- if ( store_progress_evert_x_iterations && !(n % store_progress_evert_x_iterations ))
1521+ if ( store_progress_every_x_iterations && !(n % store_progress_every_x_iterations ))
15201522 lstm_store_progress (store_progress_file_name , n , loss );
15211523
1522- if ( b + model -> params -> mini_batch_size >= training_points )
1524+ if ( store_network_every && !(n % store_network_every ) ) {
1525+ lstm_store_net_layers (model_layers , params -> store_network_name_raw , layers );
1526+ lstm_store_net_layers_as_json (model_layers , params -> store_network_name_json , params -> store_char_indx_map_name , char_index_mapping , layers );
1527+ printf ("\nStored the net as: '%s'\nYou can use that file in the .html interface.\n" ,
1528+ params -> store_network_name_json );
1529+ printf ("The net in its raw format is stored as: '%s'.\nYou can use that with the -r flag \
1530+ to continue refining the weights.\n" , params -> store_network_name_raw );
1531+ }
1532+
1533+ if ( b + params -> mini_batch_size >= training_points )
15231534 epoch ++ ;
15241535
1525- i = (b + model -> params -> mini_batch_size ) % training_points ;
1536+ i = (b + params -> mini_batch_size ) % training_points ;
15261537
1527- if ( i < model -> params -> mini_batch_size ){
1538+ if ( i < params -> mini_batch_size ){
15281539 i = 0 ;
15291540 }
15301541
15311542 if ( decrease_lr ) {
1532- model -> params -> learning_rate = initial_learning_rate / ( 1.0 + n / model -> params -> learning_rate_decrease );
1543+ params -> learning_rate = initial_learning_rate / ( 1.0 + n / params -> learning_rate_decrease );
15331544// printf("learning rate: %lf\n", model->params->learning_rate);
15341545 }
15351546
@@ -1541,7 +1552,7 @@ lstm_model_parameters_t has a field called 'optimizer'. Set this value to:\n\
15411552 lstm_values_next_cache_free (d_next_layers [p ]);
15421553
15431554 i = 0 ;
1544- while ( i < model -> params -> mini_batch_size ) {
1555+ while ( i < params -> mini_batch_size ) {
15451556 lstm_cache_container_free (cache_layers [p ][i ]);
15461557 lstm_cache_container_free (cache_layers [p ][i ]);
15471558 ++ i ;
0 commit comments