Skip to content

Commit 34f0d82

Browse files
committed
Added streaming beat control signal.
1 parent 4162599 commit 34f0d82

File tree

2 files changed

+51
-43
lines changed

2 files changed

+51
-43
lines changed

hls4ml/backends/oneapi/oneapi_types.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,16 @@ def definition_cpp(self, name_suffix='', as_reference=False):
171171
return f'{self.type.name} {self.name}{name_suffix}'
172172

173173
def declare_cpp(self, pipe_min_size=0, indent=''):
174-
lines = indent + f'class {self.pipe_id};\n'
175-
lines += indent + (
176-
f'using {self.pipe_name} = sycl::ext::intel::experimental::pipe<{self.pipe_id}, '
177-
+ f'{self.type.name}, {pipe_min_size}, PipeProps>;\n'
174+
# Updated to use streaming beat for restartable streaming kernel.
175+
# Streaming beat is a wrapper type of the actual type with sideband control signals.
176+
# Syntax: using BeatT = sycl::ext::intel::experimental::StreamingBeat<DataT, eop, empty>;
177+
streaming_beat_t = f"{self.type.name}BeatT"
178+
lines = (
179+
f"{indent}class {self.pipe_id};\n"
180+
f"{indent}using {streaming_beat_t} = "
181+
f"sycl::ext::intel::experimental::StreamingBeat<{self.type.name}, true, true>;\n"
182+
f"{indent}using {self.pipe_name} = sycl::ext::intel::experimental::pipe<"
183+
f"{self.pipe_id}, {streaming_beat_t}, {pipe_min_size}, HostPipePropertiesT>;\n"
178184
)
179185
return lines
180186

@@ -193,10 +199,13 @@ def definition_cpp(self, name_suffix='', as_reference=True):
193199
return f'{self.name}{name_suffix}'
194200

195201
def declare_cpp(self, indent=''):
196-
lines = indent + f'class {self.pipe_id};\n'
197-
lines += indent + (
198-
f'using {self.pipe_name} = sycl::ext::intel::experimental::pipe<{self.pipe_id}, '
199-
+ f'{self.type.name}, {self.pragma[-1]}>;\n'
202+
streaming_beat_t = f"{self.pipe_name}BeatT";
203+
lines = (
204+
f"{indent}class {self.pipe_id};\n"
205+
f"{indent}using {streaming_beat_t} = "
206+
f"sycl::ext::intel::experimental::StreamingBeat<{self.type.name}, true, true>;\n"
207+
f"{indent}using {self.pipe_name} = "
208+
f"sycl::ext::intel::experimental::pipe<{self.pipe_id}, {streaming_beat_t}, {self.pragma[-1]}>;\n"
200209
)
201210
return lines
202211

hls4ml/templates/oneapi/firmware/myproject.h

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,6 @@ using HostPipePropertiesT = decltype(sycl::ext::oneapi::experimental::properties
1818
sycl::ext::intel::experimental::protocol_avalon_streaming_uses_ready
1919
));
2020

21-
// Data wrapper type used in the host pipes.
22-
// first argument: datatype carried over this Avalon streaming interface's data signal.
23-
// second argument: enable startofpacket and endofpacket signals for synchronization.
24-
// third argument: to enable the empty signal.
25-
using InputBeatT = sycl::ext::intel::experimental::StreamingBeat<
26-
input_t, // input_t should match the input type of the first layer.
27-
true,
28-
true>;
29-
using OutputBeatT = sycl::ext::intel::experimental::StreamingBeat<
30-
result_t, // result_t should match the output type of the last layer.
31-
true,
32-
true>;
3321

3422
namespace nnet {
3523

@@ -45,12 +33,12 @@ class IDOutputDMA;
4533

4634
// Implementation of a direct memory access kernel. Move data from source, convert,
4735
// and send to the sink. Adaptive to SYCL HLS and hardware acceleration flow.
48-
template <class srcType, class dest_pipe, size_t SIZE>
36+
template <class src_T, class dest_pipe, size_t num_iteration>
4937
struct DMA_convert_data {
5038
#if !defined(IS_BSP)
5139
// When targeting a device family, we instantiate an Avalon Memory Mapped Host for
5240
// data transaction between host and the DMA kernel during emulation and simulation.
53-
sycl::ext::oneapi::experimental::annotated_arg<srcType *,
41+
sycl::ext::oneapi::experimental::annotated_arg<src_T *,
5442
decltype(sycl::ext::oneapi::experimental::properties{
5543
sycl::ext::intel::experimental::latency<0>,
5644
sycl::ext::intel::experimental::dwidth<8>,
@@ -59,7 +47,7 @@ struct DMA_convert_data {
5947
sycl::ext::intel::experimental::wait_request_requested})>
6048
#else
6149
// When targeting oneAPI BSP, we can use USM pointer to access host memory.
62-
srcType *const
50+
src_T *const
6351
#endif
6452
src;
6553

@@ -68,10 +56,10 @@ struct DMA_convert_data {
6856

6957
#if defined(IS_BSP)
7058
// Access data using host pointer.
71-
sycl::ext::intel::host_ptr<srcType> src_ptr(src);
59+
sycl::ext::intel::host_ptr<src_T> src_ptr(src);
7260
#else
7361
// Host allocation is not supported when targeting an FPGA family or part number.
74-
srcType *src_ptr(src);
62+
src_T *src_ptr(src);
7563
#endif
7664
// First, extract the PipeDataT from the pipe
7765
using PipeDataType = typename nnet::ExtractPipeType<dest_pipe>::value_type;
@@ -80,53 +68,64 @@ struct DMA_convert_data {
8068
constexpr auto dstTypeSize = std::tuple_size<DstDataType>{};
8169

8270
[[intel::fpga_register]]
83-
typename nnet::ExtractPipeType<dest_pipe>::value_type ctype;
71+
typename nnet::ExtractPipeType<dest_pipe>::value_type packet;
8472

85-
for (size_t i = 0; i < SIZE / dstTypeSize; i++) {
73+
// Keep sending data to the input layer and keep the kernels running.
74+
for (size_t i = 0; i < num_iteration; i++) {
8675
#pragma unroll
8776
for (size_t j = 0; j < dstTypeSize; j++) {
88-
ctype.data[j] = src_ptr[i * dstTypeSize + j];
77+
packet.data[j] = src_ptr[i * dstTypeSize + j];
8978
}
90-
ctype.sop = (i == 0);
91-
ctype.eop = (i == (SIZE / dstTypeSize - 1));
92-
dest_pipe::write(ctype);
79+
packet.sop = (i == 0);
80+
// Assert end-of-packet signal after the last iteration.
81+
// All down-stream kernels will stop seeing eop.
82+
packet.eop = (i == (num_iteration - 1));
83+
dest_pipe::write(packet);
9384
}
9485
}
9586
};
9687

97-
// Symmetrical to the DMA_convert_data above.
98-
template <class src_pipe, class dstType, size_t SIZE>
88+
// Symmetrical to the DMA_convert_data above, this DMA drains the output pipe and
89+
// writes result to memory.
90+
template <class src_pipe, class dst_T, size_t num_iteration>
9991
struct DMA_convert_data_back {
10092
#if !defined(IS_BSP)
101-
sycl::ext::oneapi::experimental::annotated_arg<dstType *,
93+
// Without BSP, instantiate an Avalon Memory Mapped Host to write to host.
94+
sycl::ext::oneapi::experimental::annotated_arg<dst_T *,
10295
decltype(sycl::ext::oneapi::experimental::properties{
10396
sycl::ext::intel::experimental::latency<0>,
10497
sycl::ext::intel::experimental::dwidth<8>,
10598
sycl::ext::intel::experimental::buffer_location<kOutputBufferLocation>,
10699
sycl::ext::intel::experimental::read_write_mode_write,
107100
sycl::ext::intel::experimental::wait_request_requested})>
108101
#else
109-
dstType *const
102+
// USM pointer, otherwise.
103+
dst_T *const
110104
#endif
111105
dst;
112106

113107
[[intel::kernel_args_restrict]]
114108
void operator()() const {
115109
#if defined(IS_BSP)
116-
sycl::ext::intel::host_ptr<dstType> dst_ptr(dst);
110+
sycl::ext::intel::host_ptr<dst_T> dst_ptr(dst);
117111
#else
118-
dstType *dst_ptr(dst);
112+
dst_T *dst_ptr(dst);
119113
#endif
120-
constexpr auto srcTypeSize = std::tuple_size<typename nnet::ExtractPipeType<src_pipe>::value_type>{};
114+
// First, extract the PipeDataT from the pipe
115+
using PipeDataType = typename nnet::ExtractPipeType<src_pipe>::value_type;
116+
// Then, extract the DataT from StreamingBeat
117+
using SrcDataType = typename nnet::ExtractDataType<PipeDataType>::value_type;
118+
constexpr auto srcTypeSize = std::tuple_size<SrcDataType>{};
121119

122120
[[intel::fpga_register]]
123-
typename nnet::ExtractPipeType<src_pipe>::value_type ctype;
124-
125-
for (size_t i = 0; i < SIZE / srcTypeSize; i++) {
126-
ctype = src_pipe::read();
121+
typename nnet::ExtractPipeType<src_pipe>::value_type packet;
122+
123+
// Drain the output pipe and write result to memory.
124+
for (size_t i = 0; i < num_iteration; i++) {
125+
packet = src_pipe::read();
127126
#pragma unroll
128127
for (size_t j = 0; j < srcTypeSize; j++) {
129-
dst_ptr[i * srcTypeSize + j] = ctype[j].to_double();
128+
dst_ptr[i * srcTypeSize + j] = static_cast<dst_T>(packet.data[j].to_double());
130129
}
131130
}
132131
}

0 commit comments

Comments
 (0)