@@ -18,18 +18,6 @@ using HostPipePropertiesT = decltype(sycl::ext::oneapi::experimental::properties
1818 sycl::ext::intel::experimental::protocol_avalon_streaming_uses_ready
1919));
2020
21- // Data wrapper type used in the host pipes.
22- // first argument: datatype carried over this Avalon streaming interface's data signal.
23- // second argument: enable startofpacket and endofpacket signals for synchronization.
24- // third argument: to enable the empty signal.
25- using InputBeatT = sycl::ext::intel::experimental::StreamingBeat<
26- input_t , // input_t should match the input type of the first layer.
27- true ,
28- true >;
29- using OutputBeatT = sycl::ext::intel::experimental::StreamingBeat<
30- result_t , // result_t should match the output type of the last layer.
31- true ,
32- true >;
3321
3422namespace nnet {
3523
@@ -45,12 +33,12 @@ class IDOutputDMA;
4533
4634// Implementation of a direct memory access kernel. Move data from source, convert,
4735// and send to the sink. Adaptive to SYCL HLS and hardware acceleration flow.
48- template <class srcType , class dest_pipe , size_t SIZE >
36+ template <class src_T , class dest_pipe , size_t num_iteration >
4937struct DMA_convert_data {
5038#if !defined(IS_BSP)
5139 // When targeting a device family, we instantiate an Avalon Memory Mapped Host for
5240 // data transaction between host and the DMA kernel during emulation and simulation.
53- sycl::ext::oneapi::experimental::annotated_arg<srcType *,
41+ sycl::ext::oneapi::experimental::annotated_arg<src_T *,
5442 decltype (sycl::ext::oneapi::experimental::properties{
5543 sycl::ext::intel::experimental::latency<0 >,
5644 sycl::ext::intel::experimental::dwidth<8 >,
@@ -59,7 +47,7 @@ struct DMA_convert_data {
5947 sycl::ext::intel::experimental::wait_request_requested})>
6048#else
6149 // When targeting oneAPI BSP, we can use USM pointer to access host memory.
62- srcType *const
50+ src_T *const
6351#endif
6452 src;
6553
@@ -68,10 +56,10 @@ struct DMA_convert_data {
6856
6957#if defined(IS_BSP)
7058 // Access data using host pointer.
71- sycl::ext::intel::host_ptr<srcType > src_ptr (src);
59+ sycl::ext::intel::host_ptr<src_T > src_ptr (src);
7260#else
7361 // Host allocation is not supported when targeting an FPGA family or part number.
74- srcType *src_ptr (src);
62+ src_T *src_ptr (src);
7563#endif
7664 // First, extract the PipeDataT from the pipe
7765 using PipeDataType = typename nnet::ExtractPipeType<dest_pipe>::value_type;
@@ -80,53 +68,64 @@ struct DMA_convert_data {
8068 constexpr auto dstTypeSize = std::tuple_size<DstDataType>{};
8169
8270 [[intel::fpga_register]]
83- typename nnet::ExtractPipeType<dest_pipe>::value_type ctype ;
71+ typename nnet::ExtractPipeType<dest_pipe>::value_type packet ;
8472
85- for (size_t i = 0 ; i < SIZE / dstTypeSize; i++) {
73+ // Keep sending data to the input layer and keep the kernels running.
74+ for (size_t i = 0 ; i < num_iteration; i++) {
8675 #pragma unroll
8776 for (size_t j = 0 ; j < dstTypeSize; j++) {
88- ctype .data [j] = src_ptr[i * dstTypeSize + j];
77+ packet .data [j] = src_ptr[i * dstTypeSize + j];
8978 }
90- ctype.sop = (i == 0 );
91- ctype.eop = (i == (SIZE / dstTypeSize - 1 ));
92- dest_pipe::write (ctype);
79+ packet.sop = (i == 0 );
80+ // Assert end-of-packet signal after the last iteration.
81+ // All down-stream kernels will stop seeing eop.
82+ packet.eop = (i == (num_iteration - 1 ));
83+ dest_pipe::write (packet);
9384 }
9485 }
9586};
9687
97- // Symmetrical to the DMA_convert_data above.
98- template <class src_pipe , class dstType , size_t SIZE>
88+ // Symmetrical to the DMA_convert_data above, this DMA drains the output pipe and
89+ // writes result to memory.
90+ template <class src_pipe , class dst_T , size_t num_iteration>
9991struct DMA_convert_data_back {
10092#if !defined(IS_BSP)
101- sycl::ext::oneapi::experimental::annotated_arg<dstType *,
93+ // Without BSP, instantiate an Avalon Memory Mapped Host to write to host.
94+ sycl::ext::oneapi::experimental::annotated_arg<dst_T *,
10295 decltype (sycl::ext::oneapi::experimental::properties{
10396 sycl::ext::intel::experimental::latency<0 >,
10497 sycl::ext::intel::experimental::dwidth<8 >,
10598 sycl::ext::intel::experimental::buffer_location<kOutputBufferLocation >,
10699 sycl::ext::intel::experimental::read_write_mode_write,
107100 sycl::ext::intel::experimental::wait_request_requested})>
108101#else
109- dstType *const
102+ // USM pointer, otherwise.
103+ dst_T *const
110104#endif
111105 dst;
112106
113107 [[intel::kernel_args_restrict]]
114108 void operator ()() const {
115109#if defined(IS_BSP)
116- sycl::ext::intel::host_ptr<dstType > dst_ptr (dst);
110+ sycl::ext::intel::host_ptr<dst_T > dst_ptr (dst);
117111#else
118- dstType *dst_ptr (dst);
112+ dst_T *dst_ptr (dst);
119113#endif
120- constexpr auto srcTypeSize = std::tuple_size<typename nnet::ExtractPipeType<src_pipe>::value_type>{};
114+ // First, extract the PipeDataT from the pipe
115+ using PipeDataType = typename nnet::ExtractPipeType<src_pipe>::value_type;
116+ // Then, extract the DataT from StreamingBeat
117+ using SrcDataType = typename nnet::ExtractDataType<PipeDataType>::value_type;
118+ constexpr auto srcTypeSize = std::tuple_size<SrcDataType>{};
121119
122120 [[intel::fpga_register]]
123- typename nnet::ExtractPipeType<src_pipe>::value_type ctype;
124-
125- for (size_t i = 0 ; i < SIZE / srcTypeSize; i++) {
126- ctype = src_pipe::read ();
121+ typename nnet::ExtractPipeType<src_pipe>::value_type packet;
122+
123+ // Drain the output pipe and write result to memory.
124+ for (size_t i = 0 ; i < num_iteration; i++) {
125+ packet = src_pipe::read ();
127126 #pragma unroll
128127 for (size_t j = 0 ; j < srcTypeSize; j++) {
129- dst_ptr[i * srcTypeSize + j] = ctype [j].to_double ();
128+ dst_ptr[i * srcTypeSize + j] = static_cast <dst_T>(packet. data [j].to_double () );
130129 }
131130 }
132131 }
0 commit comments