|
8 | 8 | // currently this is fixed |
9 | 9 | using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(sycl::ext::intel::experimental::ready_latency<0>)); |
10 | 10 |
|
| 11 | +// Pipe properties for host pipes. Host pipes connect to the data source DMA and sink DMA. |
| 12 | +// They are connected to the first and the last layer to stream data into and out from the kernel. |
| 13 | +using HostPipePropertiesT = decltype(sycl::ext::oneapi::experimental::properties( |
| 14 | + sycl::ext::intel::experimental::ready_latency<0>, |
| 15 | + sycl::ext::intel::experimental::bits_per_symbol<8>, |
| 16 | + sycl::ext::intel::experimental::uses_valid<true>, |
| 17 | + sycl::ext::intel::experimental::first_symbol_in_high_order_bits<true>, |
| 18 | + sycl::ext::intel::experimental::protocol_avalon_streaming_uses_ready |
| 19 | +)); |
| 20 | + |
| 21 | +// Data wrapper type used in the host pipes. |
| 22 | +// first argument: datatype carried over this Avalon streaming interface's data signal. |
| 23 | +// second argument: enable startofpacket and endofpacket signals for synchronization. |
| 24 | +// third argument: to enable the empty signal. |
| 25 | +using InputBeatT = sycl::ext::intel::experimental::StreamingBeat< |
| 26 | + input_t, // input_t should match the input type of the first layer. |
| 27 | + true, |
| 28 | + true>; |
| 29 | +using OutputBeatT = sycl::ext::intel::experimental::StreamingBeat< |
| 30 | + result_t, // result_t should match the output type of the last layer. |
| 31 | + true, |
| 32 | + true>; |
| 33 | + |
| 34 | +namespace nnet { |
| 35 | + |
| 36 | +#if !defined(IS_BSP) |
| 37 | +// Definition for buffer locations for Avalon MM host. |
| 38 | +inline constexpr unsigned kInputBufferLocation = 0; |
| 39 | +inline constexpr unsigned kOutputBufferLocation = 1; |
| 40 | +#endif |
| 41 | + |
| 42 | +// Name for DMAs. |
| 43 | +class IDInputDMA; |
| 44 | +class IDOutputDMA; |
| 45 | + |
| 46 | +// Implementation of a direct memory access kernel. Move data from source, convert, |
| 47 | +// and send to the sink. Adaptive to SYCL HLS and hardware acceleration flow. |
| 48 | +template <class srcType, class dest_pipe, size_t SIZE> |
| 49 | +struct DMA_convert_data { |
| 50 | +#if !defined(IS_BSP) |
| 51 | + // When targeting a device family, we instantiate an Avalon Memory Mapped Host for |
| 52 | + // data transaction between host and the DMA kernel during emulation and simulation. |
| 53 | + sycl::ext::oneapi::experimental::annotated_arg<srcType *, |
| 54 | + decltype(sycl::ext::oneapi::experimental::properties{ |
| 55 | + sycl::ext::intel::experimental::latency<0>, |
| 56 | + sycl::ext::intel::experimental::dwidth<8>, |
| 57 | + sycl::ext::intel::experimental::buffer_location<kInputBufferLocation>, |
| 58 | + sycl::ext::intel::experimental::read_write_mode_read, |
| 59 | + sycl::ext::intel::experimental::wait_request_requested})> |
| 60 | +#else |
| 61 | + // When targeting oneAPI BSP, we can use USM pointer to access host memory. |
| 62 | + srcType *const |
| 63 | +#endif |
| 64 | + src; |
| 65 | + |
| 66 | + [[intel::kernel_args_restrict]] |
| 67 | + void operator()() const { |
| 68 | + |
| 69 | +#if defined(IS_BSP) |
| 70 | + // Access data using host pointer. |
| 71 | + sycl::ext::intel::host_ptr<srcType> src_ptr(src); |
| 72 | +#else |
| 73 | + // Host allocation is not supported when targeting an FPGA family or part number. |
| 74 | + srcType *src_ptr(src); |
| 75 | +#endif |
| 76 | + // First, extract the PipeDataT from the pipe |
| 77 | + using PipeDataType = typename nnet::ExtractPipeType<dest_pipe>::value_type; |
| 78 | + // Then, extract the DataT from StreamingBeat |
| 79 | + using DstDataType = typename nnet::ExtractDataType<PipeDataType>::value_type; |
| 80 | + constexpr auto dstTypeSize = std::tuple_size<DstDataType>{}; |
| 81 | + |
| 82 | + [[intel::fpga_register]] |
| 83 | + typename nnet::ExtractPipeType<dest_pipe>::value_type ctype; |
| 84 | + |
| 85 | + for (size_t i = 0; i < SIZE / dstTypeSize; i++) { |
| 86 | + #pragma unroll |
| 87 | + for (size_t j = 0; j < dstTypeSize; j++) { |
| 88 | + ctype.data[j] = src_ptr[i * dstTypeSize + j]; |
| 89 | + } |
| 90 | + ctype.sop = (i == 0); |
| 91 | + ctype.eop = (i == (SIZE / dstTypeSize - 1)); |
| 92 | + dest_pipe::write(ctype); |
| 93 | + } |
| 94 | + } |
| 95 | +}; |
| 96 | + |
| 97 | +// Symmetrical to the DMA_convert_data above. |
| 98 | +template <class src_pipe, class dstType, size_t SIZE> |
| 99 | +struct DMA_convert_data_back { |
| 100 | +#if !defined(IS_BSP) |
| 101 | + sycl::ext::oneapi::experimental::annotated_arg<dstType *, |
| 102 | + decltype(sycl::ext::oneapi::experimental::properties{ |
| 103 | + sycl::ext::intel::experimental::latency<0>, |
| 104 | + sycl::ext::intel::experimental::dwidth<8>, |
| 105 | + sycl::ext::intel::experimental::buffer_location<kOutputBufferLocation>, |
| 106 | + sycl::ext::intel::experimental::read_write_mode_write, |
| 107 | + sycl::ext::intel::experimental::wait_request_requested})> |
| 108 | +#else |
| 109 | + dstType *const |
| 110 | +#endif |
| 111 | + dst; |
| 112 | + |
| 113 | + [[intel::kernel_args_restrict]] |
| 114 | + void operator()() const { |
| 115 | +#if defined(IS_BSP) |
| 116 | + sycl::ext::intel::host_ptr<dstType> dst_ptr(dst); |
| 117 | +#else |
| 118 | + dstType *dst_ptr(dst); |
| 119 | +#endif |
| 120 | + constexpr auto srcTypeSize = std::tuple_size<typename nnet::ExtractPipeType<src_pipe>::value_type>{}; |
| 121 | + |
| 122 | + [[intel::fpga_register]] |
| 123 | + typename nnet::ExtractPipeType<src_pipe>::value_type ctype; |
| 124 | + |
| 125 | + for (size_t i = 0; i < SIZE / srcTypeSize; i++) { |
| 126 | + ctype = src_pipe::read(); |
| 127 | + #pragma unroll |
| 128 | + for (size_t j = 0; j < srcTypeSize; j++) { |
| 129 | + dst_ptr[i * srcTypeSize + j] = ctype[j].to_double(); |
| 130 | + } |
| 131 | + } |
| 132 | + } |
| 133 | +}; |
| 134 | + |
| 135 | +} // namespace nnet |
| 136 | + |
11 | 137 | // Need to declare the input and output pipes |
12 | 138 |
|
13 | 139 | // hls-fpga-machine-learning insert inputs |
|
0 commit comments