fastmachinelearning
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/advanced/fifo_depth.rst‎
Lines changed: 23 additions & 8 deletions b/‎docs/advanced/fifo_depth.rst‎
Lines changed: 23 additions & 8 deletions
diff --git a/‎hls4ml/backends/oneapi/passes/recurrent_templates.py‎
Lines changed: 54 additions & 7 deletions b/‎hls4ml/backends/oneapi/passes/recurrent_templates.py‎
Lines changed: 54 additions & 7 deletions
diff --git a/‎hls4ml/backends/quartus/passes/recurrent_templates.py‎
Lines changed: 42 additions & 8 deletions b/‎hls4ml/backends/quartus/passes/recurrent_templates.py‎
Lines changed: 42 additions & 8 deletions
diff --git a/‎hls4ml/backends/symbolic/passes/expr_templates.py‎
Lines changed: 1 addition & 0 deletions b/‎hls4ml/backends/symbolic/passes/expr_templates.py‎
Lines changed: 1 addition & 0 deletions
@@ -10,7 +10,7 @@ repos:
            '--skip-string-normalization']
 
 - repo: https://github.com/tox-dev/pyproject-fmt
-  rev: v2.5.0
+  rev: v2.5.1
   hooks:
     - id: pyproject-fmt
 
@@ -30,7 +30,7 @@ repos:
   - id: trailing-whitespace
 
 - repo: https://github.com/PyCQA/isort
-  rev: 6.0.0
+  rev: 6.0.1
   hooks:
   - id: isort
 
 
@@ -5,28 +5,29 @@ FIFO Buffer Depth Optimization
 With the ``io_stream`` IO type, each layer is connected with the subsequent layer through first-in first-out (FIFO) buffers.
 The implementation of the FIFO buffers contribute to the overall resource utilization of the design, impacting in particular the BRAM or LUT utilization.
 Because the neural networks can have complex architectures generally, it is hard to know a priori the correct depth of each FIFO buffer.
-By default ``hls4ml`` choses the most conservative possible depth for each FIFO buffer, which can result in a an unnecessary overutilization of resources.
+By default ``hls4ml`` choses the most conservative possible depth for each FIFO buffer, which can result in a an unnecessary over-utilization of resources.
 
-In order to reduce the impact on the resources used for FIFO buffer implementation, an optimization has been developed in `#509 <https://github.com/fastmachinelearning/hls4ml/pull/509>`_ that correctly sizes the depth of the FIFO buffers by analyzing the RTL cosimulation.
-We implemented this FIFO buffer resizing as a :py:class:`~hls4ml.backends.vivado.passes.fifo_depth_optimization` optimizer pass.
+In order to reduce the impact on the resources used for FIFO buffer implementation, an optimization flow has been developed that correctly sizes the depth
+of the FIFO buffers by analyzing the RTL co-simulation. This feature is currently available in ``Vitis`` and ``Vivado`` backends.
+
+In ``Vivado`` backend, FIFO buffer resizing is implemented as a :py:class:`~hls4ml.backends.vivado.passes.fifo_depth_optimization` optimizer pass.
 Through RTL simulation with large FIFO buffers (by default set to a depth of 100,000), we estimate the maximum occupation of each FIFO.
 Once the maximum depth is determined, the optimizer pass sets the FIFO buffer depth to that value plus 1.
 
-As an example, we show below how to use the optimizer pass, inspired by this `GitHub Gist <https://gist.github.com/nicologhielmetti/3a268be32755448920e9f7d5c78a76d8>`_.
-First, we can define a simple neural network in Keras
+Below we show an example of the use of the FIFO depth optimization. First, we can define a simple neural network in Keras:
 
 .. code-block:: Python
 
     from tensorflow.keras.layers import Dense
     from tensorflow.keras.models import Sequential
 
     model = Sequential()
-    model.add(Dense(64, input_shape=(16,), name='fc1', activation='relu')
+    model.add(Dense(64, input_shape=(16,), name='fc1', activation='relu'))
     model.add(Dense(32, name='fc2', activation='relu'))
     model.add(Dense(32, name='fc3', activation='relu'))
-    model.add(Dense(5, name='fc3', activation='softmax'))
+    model.add(Dense(5, name='fc4', activation='softmax'))
 
-Then, we can convert the model, including the flow
+Then, we can convert the model, including the flow:
 
 .. code-block:: Python
 
@@ -47,3 +48,17 @@ Then, we can convert the model, including the flow
     hls_model.build(reset=False, csim=True, synth=True, cosim=True)
 
 For more details and results, see `H. Borras et al., "Open-source FPGA-ML codesign for the MLPerf Tiny Benchmark" (2022) <https://arxiv.org/abs/2206.11791>`_.
+
+Similarly, the FIFO buffers can be optimized while using the ``Vitis`` backend with the following changes:
+
+.. code-block:: Python
+
+    config['Flows'] = ['vitis:fifo_depth_optimization']
+    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(profiling_fifo_depth=100_000)
+
+    hls_model = hls4ml.converters.convert_from_keras_model(model,
+                                                        io_type='io_stream',
+                                                        hls_config=config,
+                                                        output_dir='hls4mlprj_fifo_depth_opt',
+                                                        part='xc7z020clg400-1',
+                                                        backend='Vitis')
@@ -92,10 +92,14 @@
     using activation_recr = nnet::activation::{recurrent_activation}<x_T, y_T, config_T>;
 
     static const unsigned reuse_factor = {reuse};
+    static const unsigned pytorch_order = {pytorch};
     static const bool store_weights_in_bram = false;
 }};\n'''
 
 gru_function_template = 'nnet::gru<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br});'
+gru_function_initial_state_template = (
+    'nnet::gru_init_state<{input_t}, {h_t}, {output_t}, {config}>({input}, {init_state}, {output}, {w}, {wr}, {b}, {br});'
+)
 gru_task_sequence_template = 'task_sequence<nnet::gru_stream<{input_pipe}, {output_pipe}, {config}>> {name};'
 gru_stream_function_template = '{name}.async({w}, {wr}, {b}, {br});'
 
@@ -120,6 +124,7 @@ def format(self, node):
         params['config_mult_h'] = f'config{node.index}_h_mult'
         params['act_t'] = '{}_config{}'.format(node.get_attr('activation'), str(node.index) + '_act')
         params['act_recurrent_t'] = '{}_config{}'.format(node.get_attr('recurrent_activation'), str(node.index) + '_rec_act')
+        params['pytorch'] = 'true' if node.get_attr('pytorch', False) else 'false'
         gru_config = self.gru_template.format(**params)
 
         # Activation is on candidate hidden state, dimensionality (1, n_units)
@@ -163,15 +168,23 @@ def format(self, node):
 class GRUFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(GRU, include_header=recurrent_include_list)
-        self.template = gru_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
+        if params['pass_initial_states'] == 'true':
+            params['h_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['init_state'] = node.get_input_variable(node.inputs[1]).name
         params['w'] = node.get_weights('weight').name
         params['b'] = node.get_weights('bias').name
         params['wr'] = node.get_weights('recurrent_weight').name
         params['br'] = node.get_weights('recurrent_bias').name
-        return self.template.format(**params)
+
+        if params['pass_initial_states'] == 'true':
+            template = gru_function_initial_state_template
+        else:
+            template = gru_function_template
+
+        return template.format(**params)
 
 
 class GRUTaskSequenceTemplate(TaskSequenceTemplate):
@@ -235,6 +248,10 @@ def format(self, node):
 }};\n"""
 
 lstm_function_template = 'nnet::lstm<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+lstm_function_initial_state_template = (
+    'nnet::lstm_init_state<{input_t}, {h_t}, {hc_t}, {output_t}, {config}>'
+    '({input}, {init_state}, {init_cell}, {output}, {weights});'
+)
 
 
 class LSTMConfigTemplate(LayerConfigTemplate):
@@ -275,11 +292,16 @@ def format(self, node):
 class LSTMFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(LSTM, include_header=recurrent_include_list)
-        self.template = lstm_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
 
+        if params['pass_initial_states'] == 'true':
+            params['h_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['init_state'] = node.get_input_variable(node.inputs[1]).name
+            params['init_cell'] = node.get_input_variable(node.inputs[2]).name
+            params['hc_t'] = node.get_input_variable(node.inputs[2]).type.name
+
         types = ['i', 'f', 'c', 'o']
         params['weights'] = ''
         for t in types:
@@ -289,13 +311,18 @@ def format(self, node):
         for t in types:
             params['weights'] += 'bias_{}_{}{}'.format(t, str(node.index), ',' if t != 'o' else '')
 
-        return self.template.format(**params)
+        if params['pass_initial_states'] == 'true':
+            template = lstm_function_initial_state_template
+        else:
+            template = lstm_function_template
+
+        return template.format(**params)
 
 
 ################################################
 # SimpleRNN Template
 ################################################
-simple_rnn_config_template = """struct config{index} : nnet::simpleRNN_config {{
+simple_rnn_config_template = """struct config{index} : nnet::simple_rnn_config {{
     static const unsigned n_in = {n_in};
     static const unsigned n_out = {n_out};
     static const unsigned n_outputs = {n_outputs};
@@ -306,6 +333,7 @@ def format(self, node):
     typedef {weight_t.name} weight_t;
     typedef {bias_t.name} bias_t;
     typedef {recurrent_weight_t.name} recurrent_weight_t;
+    typedef {recurrent_bias_t.name} recurrent_bias_t;
 
     typedef {act_t} ACT_CONFIG_T;
     template<class x_T, class y_T, class config_T>
@@ -320,6 +348,10 @@ def format(self, node):
 }};\n"""
 
 simple_rnn_function_template = 'nnet::simple_rnn<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+simple_rnn_pytorch_function_template = (
+    'nnet::simple_rnn_pytorch<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+)
+simple_rnn_pytorch_function_initial_state_template = 'nnet::simple_rnn_pytorch_init_state<{input_t}, {h_t}, {output_t}, {config}>({input}, {init_state}, {output}, {weights});'  # noqa E501
 
 
 class SimpleRNNConfigTemplate(LayerConfigTemplate):
@@ -341,6 +373,9 @@ def format(self, node):
         )
         simple_rnn_params['recurrent_activation'] = 'relu'
 
+        # In Keras there is no recurrent bias, so put a placeholder
+        simple_rnn_params.setdefault('recurrent_bias_t', simple_rnn_params['bias_t'])
+
         simple_rnn_config = self.template.format(**simple_rnn_params)
 
         act_params = self._default_config_params(node)
@@ -365,5 +400,17 @@ def __init__(self):
 
     def format(self, node):
         params = self._default_function_params(node)
-        params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
-        return self.template.format(**params)
+        if params['pass_initial_states'] == 'true':
+            params['h_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['init_state'] = node.get_input_variable(node.inputs[1]).name
+
+        if node.get_attr('pytorch', False):
+            if params['pass_initial_states'] == 'true':
+                template = simple_rnn_pytorch_function_initial_state_template
+            else:
+                template = simple_rnn_pytorch_function_template
+            params['weights'] = 'w{0}, wr{0}, b{0}, br{0}'.format(str(node.index))
+        else:
+            template = simple_rnn_function_template
+            params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
+        return template.format(**params)
@@ -71,6 +71,9 @@
 }};\n'''
 
 gru_function_template = 'nnet::gru<{input_t}, {output_t}, {config}>({input}, {output}, {w}, {wr}, {b}, {br});'
+gru_function_initial_state_template = (
+    'nnet::gru<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {w}, {wr}, {b}, {br});'
+)
 
 
 class GRUConfigTemplate(LayerConfigTemplate):
@@ -137,15 +140,23 @@ def format(self, node):
 class GRUFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(GRU, include_header=recurrent_include_list)
-        self.template = gru_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
+        if params['pass_initial_states'] == 'true':
+            params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['input2'] = node.get_input_variable(node.inputs[1]).name
         params['w'] = node.get_weights('weight').name
         params['b'] = node.get_weights('bias').name
         params['wr'] = node.get_weights('recurrent_weight').name
         params['br'] = node.get_weights('recurrent_bias').name
-        return self.template.format(**params)
+
+        if params['pass_initial_states'] == 'true':
+            template = gru_function_initial_state_template
+        else:
+            template = gru_function_template
+
+        return template.format(**params)
 
 
 ################################################
@@ -174,6 +185,9 @@ def format(self, node):
 }};\n"""
 
 lstm_function_template = 'nnet::lstm<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
+lstm_function_initial_state_template = (
+    'nnet::lstm<{input_t}, {input2_t}, {input3_t}, {output_t}, {config}>({input}, {input2}, {input3}, {output}, {weights});'
+)
 
 
 class LSTMConfigTemplate(LayerConfigTemplate):
@@ -214,11 +228,16 @@ def format(self, node):
 class LSTMFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(LSTM, include_header=recurrent_include_list)
-        self.template = lstm_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
 
+        if params['pass_initial_states'] == 'true':
+            params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['input2'] = node.get_input_variable(node.inputs[1]).name
+            params['input3'] = node.get_input_variable(node.inputs[2]).name
+            params['input3_t'] = node.get_input_variable(node.inputs[2]).type.name
+
         types = ['i', 'f', 'c', 'o']
         params['weights'] = ''
         for t in types:
@@ -228,13 +247,18 @@ def format(self, node):
         for t in types:
             params['weights'] += 'bias_{}_{}{}'.format(t, str(node.index), ',' if t != 'o' else '')
 
-        return self.template.format(**params)
+        if params['pass_initial_states'] == 'true':
+            template = lstm_function_initial_state_template
+        else:
+            template = lstm_function_template
+
+        return template.format(**params)
 
 
 ################################################
 # SimpleRNN Template
 ################################################
-simple_rnn_config_template = """struct config{index} : nnet::simpleRNN_config {{
+simple_rnn_config_template = """struct config{index} : nnet::simple_rnn_config {{
     static const unsigned n_in = {n_in};
     static const unsigned n_out = {n_out};
     static const unsigned n_outputs = {n_outputs};
@@ -261,6 +285,9 @@ def format(self, node):
 simple_rnn_pytorch_function_template = (
     'nnet::simple_rnn_pytorch<{input_t}, {output_t}, {config}>({input}, {output}, {weights});'
 )
+simple_rnn_pytorch_function_initial_state_template = (
+    'nnet::simple_rnn_pytorch<{input_t}, {input2_t}, {output_t}, {config}>({input}, {input2}, {output}, {weights});'
+)
 
 
 class SimpleRNNConfigTemplate(LayerConfigTemplate):
@@ -302,13 +329,20 @@ def format(self, node):
 class SimpleRNNFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(SimpleRNN, include_header=recurrent_include_list)
-        self.template = simple_rnn_function_template
 
     def format(self, node):
         params = self._default_function_params(node)
+        if params['pass_initial_states'] == 'true':
+            params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+            params['input2'] = node.get_input_variable(node.inputs[1]).name
+
         if node.get_attr('pytorch', False):
-            self.template = simple_rnn_pytorch_function_template
+            if params['pass_initial_states'] == 'true':
+                template = simple_rnn_pytorch_function_initial_state_template
+            else:
+                template = simple_rnn_pytorch_function_template
             params['weights'] = 'w{0}, wr{0}, b{0}, br{0}'.format(str(node.index))
         else:
+            template = simple_rnn_function_template
             params['weights'] = 'w{0}, wr{0}, b{0}'.format(str(node.index))
-        return self.template.format(**params)
+        return template.format(**params)
@@ -33,6 +33,7 @@ def __init__(self, layer, lut_functions, use_built_in_luts=False, settings=None)
                 user_functions = settings.get('user_functions', {})
                 user_functions.update(lut_functions)
                 settings['user_functions'] = user_functions
+            settings['strict'] = False
 
         super().__init__(settings)
         self.layer = layer