fastmachinelearning
diff --git a/‎hls4ml/backends/oneapi/passes/recurrent_templates.py‎
Lines changed: 54 additions & 7 deletions b/‎hls4ml/backends/oneapi/passes/recurrent_templates.py‎
Lines changed: 54 additions & 7 deletions
diff --git a/‎hls4ml/backends/quartus/passes/recurrent_templates.py‎
Lines changed: 42 additions & 8 deletions b/‎hls4ml/backends/quartus/passes/recurrent_templates.py‎
Lines changed: 42 additions & 8 deletions
diff --git a/‎hls4ml/backends/vivado/passes/recurrent_templates.py‎
Lines changed: 18 additions & 2 deletions b/‎hls4ml/backends/vivado/passes/recurrent_templates.py‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎hls4ml/converters/pytorch/recurrent.py‎
Lines changed: 11 additions & 11 deletions b/‎hls4ml/converters/pytorch/recurrent.py‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎hls4ml/converters/pytorch_to_hls.py‎
Lines changed: 11 additions & 3 deletions b/‎hls4ml/converters/pytorch_to_hls.py‎
Lines changed: 11 additions & 3 deletions
@@ -92,10 +92,14 @@
     using activation_recr = nnet::activation::{recurrent_activation}<x_T, y_T, config_T>;
 
     static const unsigned reuse_factor = {reuse};
+    static const unsigned pytorch_order = {pytorch};
     static const bool store_weights_in_bram = false;
 }};\n'''
 
 gru_function_template = 'nnet::gru<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br});'
+gru_function_initial_state_template = (
+    'nnet::gru_init_state<{input_t}, {h_t}, {output_t}, {config}>({input}, {init_state}, {output}, {w}, {wr}, {b}, {br});'
+)
 gru_task_sequence_template = 'task_sequence<nnet::gru_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
 gru_stream_function_template = '{name}.async({w}, {wr}, {b}, {br});'
 
@@ -120,6 +124,7 @@ def format(self, node):
         params['config_mult_h'] = f'config{node.index}_h_mult'
         params['act_t'] = '{}_config{}'.format(node.get_attr('activation'), str(node.index) + '_act')
         params['act_recurrent_t'] = '{}_config{}'.format(node.get_attr('recurrent_activation'), str(node.index) + '_rec_act')
+        params['pytorch'] = 'true' if node.get_attr('pytorch', False) else 'false'
         gru_config = self.gru_template.format(**params)
 
         # Activation is on candidate hidden state, dimensionality (1, n_units)
@@ -163,15 +168,23 @@ def format(self, node):
 class GRUFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(GRU, include_header=recurrent_include_list)
-        self.template = gru_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
+        if params['pass_initial_states'] == 'true':
+            params['h_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['init_state'] = node.get_input_variable(node.inputs[1]).name
         params['w'] = node.get_weights('weight').name
         params['b'] = node.get_weights('bias').name
         params['wr'] = node.get_weights('recurrent_weight').name
         params['br'] = node.get_weights('recurrent_bias').name
-        return self.template.format(**params)
+
+        if params['pass_initial_states'] == 'true':
+            template = gru_function_initial_state_template
+        else:
+            template = gru_function_template
+
+        return template.format(**params)
 
 
 class GRUTaskSequenceTemplate(TaskSequenceTemplate):
@@ -235,6 +248,10 @@ def format(self, node):
 }};\n"""
 
 lstm_function_template = 'nnet::lstm<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+lstm_function_initial_state_template = (
+    'nnet::lstm_init_state<{input_t}, {h_t}, {hc_t}, {output_t}, {config}>'
+    '({input}, {init_state}, {init_cell}, {output}, {weights});'
+)
 
 
 class LSTMConfigTemplate(LayerConfigTemplate):
@@ -275,11 +292,16 @@ def format(self, node):
 class LSTMFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(LSTM, include_header=recurrent_include_list)
-        self.template = lstm_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
 
+        if params['pass_initial_states'] == 'true':
+            params['h_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['init_state'] = node.get_input_variable(node.inputs[1]).name
+            params['init_cell'] = node.get_input_variable(node.inputs[2]).name
+            params['hc_t'] = node.get_input_variable(node.inputs[2]).type.name
+
         types = ['i', 'f', 'c', 'o']
         params['weights'] = ''
         for t in types:
@@ -289,13 +311,18 @@ def format(self, node):
         for t in types:
             params['weights'] += 'bias_{}_{}{}'.format(t, str(node.index), ',' if t != 'o' else '')
 
-        return self.template.format(**params)
+        if params['pass_initial_states'] == 'true':
+            template = lstm_function_initial_state_template
+        else:
+            template = lstm_function_template
+
+        return template.format(**params)
 
 
 ################################################
 # SimpleRNN Template
 ################################################
-simple_rnn_config_template = """struct config{index} : nnet::simpleRNN_config {{
+simple_rnn_config_template = """struct config{index} : nnet::simple_rnn_config {{
     static const unsigned n_in = {n_in};
     static const unsigned n_out = {n_out};
     static const unsigned n_outputs = {n_outputs};
@@ -306,6 +333,7 @@ def format(self, node):
     typedef {weight_t.name} weight_t;
     typedef {bias_t.name} bias_t;
     typedef {recurrent_weight_t.name} recurrent_weight_t;
+    typedef {recurrent_bias_t.name} recurrent_bias_t;
 
     typedef {act_t} ACT_CONFIG_T;
     template<class x_T, class y_T, class config_T>
@@ -320,6 +348,10 @@ def format(self, node):
 }};\n"""
 
 simple_rnn_function_template = 'nnet::simple_rnn<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+simple_rnn_pytorch_function_template = (
+    'nnet::simple_rnn_pytorch<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+)
+simple_rnn_pytorch_function_initial_state_template = 'nnet::simple_rnn_pytorch_init_state<{input_t}, {h_t}, {output_t}, {config}>({input}, {init_state}, {output}, {weights});'  # noqa E501
 
 
 class SimpleRNNConfigTemplate(LayerConfigTemplate):
@@ -341,6 +373,9 @@ def format(self, node):
         )
         simple_rnn_params['recurrent_activation'] = 'relu'
 
+        # In Keras there is no recurrent bias, so put a placeholder
+        simple_rnn_params.setdefault('recurrent_bias_t', simple_rnn_params['bias_t'])
+
         simple_rnn_config = self.template.format(**simple_rnn_params)
 
         act_params = self._default_config_params(node)
@@ -365,5 +400,17 @@ def __init__(self):
 
     def format(self, node):
         params = self._default_function_params(node)
-        params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
-        return self.template.format(**params)
+        if params['pass_initial_states'] == 'true':
+            params['h_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['init_state'] = node.get_input_variable(node.inputs[1]).name
+
+        if node.get_attr('pytorch', False):
+            if params['pass_initial_states'] == 'true':
+                template = simple_rnn_pytorch_function_initial_state_template
+            else:
+                template = simple_rnn_pytorch_function_template
+            params['weights'] = 'w{0}, wr{0}, b{0}, br{0}'.format(str(node.index))
+        else:
+            template = simple_rnn_function_template
+            params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
+        return template.format(**params)
@@ -71,6 +71,9 @@
 }};\n'''
 
 gru_function_template = 'nnet::gru<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br});'
+gru_function_initial_state_template = (
+    'nnet::gru<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});'
+)
 
 
 class GRUConfigTemplate(LayerConfigTemplate):
@@ -137,15 +140,23 @@ def format(self, node):
 class GRUFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(GRU, include_header=recurrent_include_list)
-        self.template = gru_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
+        if params['pass_initial_states'] == 'true':
+            params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['input2'] = node.get_input_variable(node.inputs[1]).name
         params['w'] = node.get_weights('weight').name
         params['b'] = node.get_weights('bias').name
         params['wr'] = node.get_weights('recurrent_weight').name
         params['br'] = node.get_weights('recurrent_bias').name
-        return self.template.format(**params)
+
+        if params['pass_initial_states'] == 'true':
+            template = gru_function_initial_state_template
+        else:
+            template = gru_function_template
+
+        return template.format(**params)
 
 
 ################################################
@@ -174,6 +185,9 @@ def format(self, node):
 }};\n"""
 
 lstm_function_template = 'nnet::lstm<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+lstm_function_initial_state_template = (
+    'nnet::lstm<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {weights});'
+)
 
 
 class LSTMConfigTemplate(LayerConfigTemplate):
@@ -214,11 +228,16 @@ def format(self, node):
 class LSTMFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(LSTM, include_header=recurrent_include_list)
-        self.template = lstm_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
 
+        if params['pass_initial_states'] == 'true':
+            params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['input2'] = node.get_input_variable(node.inputs[1]).name
+            params['input3'] = node.get_input_variable(node.inputs[2]).name
+            params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name
+
         types = ['i', 'f', 'c', 'o']
         params['weights'] = ''
         for t in types:
@@ -228,13 +247,18 @@ def format(self, node):
         for t in types:
             params['weights'] += 'bias_{}_{}{}'.format(t, str(node.index), ',' if t != 'o' else '')
 
-        return self.template.format(**params)
+        if params['pass_initial_states'] == 'true':
+            template = lstm_function_initial_state_template
+        else:
+            template = lstm_function_template
+
+        return template.format(**params)
 
 
 ################################################
 # SimpleRNN Template
 ################################################
-simple_rnn_config_template = """struct config{index} : nnet::simpleRNN_config {{
+simple_rnn_config_template = """struct config{index} : nnet::simple_rnn_config {{
     static const unsigned n_in = {n_in};
     static const unsigned n_out = {n_out};
     static const unsigned n_outputs = {n_outputs};
@@ -261,6 +285,9 @@ def format(self, node):
 simple_rnn_pytorch_function_template = (
     'nnet::simple_rnn_pytorch<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
 )
+simple_rnn_pytorch_function_initial_state_template = (
+    'nnet::simple_rnn_pytorch<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {weights});'
+)
 
 
 class SimpleRNNConfigTemplate(LayerConfigTemplate):
@@ -302,13 +329,20 @@ def format(self, node):
 class SimpleRNNFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(SimpleRNN, include_header=recurrent_include_list)
-        self.template = simple_rnn_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
+        if params['pass_initial_states'] == 'true':
+            params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['input2'] = node.get_input_variable(node.inputs[1]).name
+
         if node.get_attr('pytorch', False):
-            self.template = simple_rnn_pytorch_function_template
+            if params['pass_initial_states'] == 'true':
+                template = simple_rnn_pytorch_function_initial_state_template
+            else:
+                template = simple_rnn_pytorch_function_template
             params['weights'] = 'w{0}, wr{0}, b{0}, br{0}'.format(str(node.index))
         else:
+            template = simple_rnn_function_template
             params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
-        return self.template.format(**params)
+        return template.format(**params)
@@ -87,6 +87,8 @@
 }};\n"""
 
 recr_function_template = 'nnet::{recr_type}_stack<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br});'
+recr_function_template_initial_states_lstm = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {w}, {wr}, {b}, {br});'  # noqa: E501
+recr_function_template_initial_states_gru = 'nnet::{recr_type}_stack<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});'  # noqa: E501
 
 recr_include_list = ['nnet_utils/nnet_recurrent.h']
 
@@ -208,10 +210,16 @@ def format(self, node):
 class RecurrentFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__((LSTM, GRU), include_header=recr_include_list)
-        self.template = recr_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
+        if params['pass_initial_states'] == 'true':
+            params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['input2'] = node.get_input_variable(node.inputs[1]).name
+            if node.class_name == 'LSTM':
+                params['input3'] = node.get_input_variable(node.inputs[2]).name
+                params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name
+
         params['w'] = node.get_weights('weight').name
         params['b'] = node.get_weights('bias').name
         params['wr'] = node.get_weights('recurrent_weight').name
@@ -220,4 +228,12 @@ def format(self, node):
         params['recurrent_activation'] = node.get_attr('recurrent_activation')
         params['recr_type'] = node.class_name.lower()
 
-        return self.template.format(**params)
+        if params['pass_initial_states'] == 'true':
+            if node.class_name == 'LSTM':
+                template = recr_function_template_initial_states_lstm
+            else:
+                template = recr_function_template_initial_states_gru
+        else:
+            template = recr_function_template
+
+        return template.format(**params)
@@ -1,5 +1,3 @@
-import warnings
-
 import numpy as np
 
 from hls4ml.converters.pytorch_to_hls import pytorch_handler
@@ -15,14 +13,13 @@ def parse_rnn_layer(operation, layer_name, input_names, input_shapes, node, clas
 
     layer["name"] = layer_name
 
-    layer['inputs'] = [input_names[0]]
-    if len(input_names) > 1:
-        warnings.warn(
-            'hls4ml disregards the initial value of the hidden state passed to the model, assuming that it is all zeros',
-            stacklevel=2,
-        )
+    layer['inputs'] = input_names
+    if 'IOType' in config.keys():
+        if len(input_names) > 1 and config['IOType'] == 'io_stream':
+            raise Exception('Passing initial values for the hidden state is not support for io_stream input type.')
+
     layer['class_name'] = operation
-    if operation == "RNN":
+    if operation == 'RNN':
         layer['class_name'] = 'SimpleRNN'
 
     layer['return_sequences'] = False  # parameter does not exist in pytorch
@@ -31,7 +28,7 @@ def parse_rnn_layer(operation, layer_name, input_names, input_shapes, node, clas
     if layer['class_name'] == 'SimpleRNN':
         layer['activation'] = class_object.nonlinearity  # Default is tanh, can also be ReLU in pytorch
     else:
-        layer['activation'] = "tanh"  # GRU and LSTM are hard-coded to use tanh in pytorch
+        layer['activation'] = 'tanh'  # GRU and LSTM are hard-coded to use tanh in pytorch
 
     if layer['class_name'] == 'GRU' or layer['class_name'] == 'LSTM':
         layer['recurrent_activation'] = 'sigmoid'  # GRU and LSTM are hard-coded to use sigmoid in pytorch
@@ -51,7 +48,6 @@ def parse_rnn_layer(operation, layer_name, input_names, input_shapes, node, clas
 
     if class_object.bidirectional:
         raise Exception('hls4ml does not support birectional RNNs')
-
     if class_object.dropout > 0:
         raise Exception('hls4ml does not support RNNs with dropout')
 
@@ -70,5 +66,9 @@ def parse_rnn_layer(operation, layer_name, input_names, input_shapes, node, clas
     output_shape = [input_shapes[0][0], layer['n_out']]
 
     layer['pytorch'] = True  # need to switch some behaviors to match pytorch implementations
+    if len(input_names) == 1:
+        layer['pass_initial_states'] = False
+    else:
+        layer['pass_initial_states'] = True
 
     return layer, output_shape
@@ -225,9 +225,17 @@ def parse_pytorch_model(config, verbose=True):
             # parse info from class object
             input_names = [inputs_map.get(str(i), str(i)) for i in node.args]
             if pytorch_class in ["RNN", "GRU", "LSTM"]:
-                # we currently don't support the passing of the initial value of the hidden state to RNN models
-                input_names = [inputs_map.get(str(node.args[0]), str(node.args[0]))]
-                input_shapes = [output_shapes[str(node.args[0])]]
+                input_shapes = []
+                input_names = []
+                for arg in node.args:
+                    if isinstance(arg, tuple):
+                        for input in arg:
+                            input_shapes.append(output_shapes[str(input)])
+                            input_names.append(inputs_map.get(str(input), str(input)))
+                    else:
+                        input_shapes.append(output_shapes[str(arg)])
+                        input_names.append(inputs_map.get(str(arg), str(arg)))
+
             # if a 'getitem' is the input to a node, step back in the graph to find the real source of the input
             elif "getitem" in node.args[0].name:
                 for tmp_node in traced_model.graph.nodes: