Skip to content

Commit 4162599

Browse files
committed
Input and output DMA.
1 parent 70323c9 commit 4162599

File tree

4 files changed

+141
-0
lines changed

4 files changed

+141
-0
lines changed

hls4ml/backends/oneapi/oneapi_backend.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ def create_initial_config(self, part='Arria10', clock_period=5, io_type='io_para
153153
# TODO: add namespace
154154
'WriteTar': write_tar,
155155
}
156+
157+
if 'use_bsp' in _:
158+
config['IS_BSP'] = True
156159

157160
return config
158161

hls4ml/templates/oneapi/firmware/defines.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
// hls-fpga-machine-learning insert layer-precision
1515

16+
1617
#define DIV_ROUNDUP(n, d) ((n + d - 1) / d)
1718
#define MIN(n, d) (n > d ? d : n)
1819
#define MAX(n, d) (n < d ? d : n)

hls4ml/templates/oneapi/firmware/myproject.h

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,132 @@
88
// currently this is fixed
99
using PipeProps = decltype(sycl::ext::oneapi::experimental::properties(sycl::ext::intel::experimental::ready_latency<0>));
1010

11+
// Pipe properties for host pipes. Host pipes connect to the data source DMA and sink DMA.
12+
// They are connected to the first and the last layer to stream data into and out from the kernel.
13+
using HostPipePropertiesT = decltype(sycl::ext::oneapi::experimental::properties(
14+
sycl::ext::intel::experimental::ready_latency<0>,
15+
sycl::ext::intel::experimental::bits_per_symbol<8>,
16+
sycl::ext::intel::experimental::uses_valid<true>,
17+
sycl::ext::intel::experimental::first_symbol_in_high_order_bits<true>,
18+
sycl::ext::intel::experimental::protocol_avalon_streaming_uses_ready
19+
));
20+
21+
// Data wrapper type used in the host pipes.
22+
// first argument: datatype carried over this Avalon streaming interface's data signal.
23+
// second argument: enable startofpacket and endofpacket signals for synchronization.
24+
// third argument: to enable the empty signal.
25+
using InputBeatT = sycl::ext::intel::experimental::StreamingBeat<
26+
input_t, // input_t should match the input type of the first layer.
27+
true,
28+
true>;
29+
using OutputBeatT = sycl::ext::intel::experimental::StreamingBeat<
30+
result_t, // result_t should match the output type of the last layer.
31+
true,
32+
true>;
33+
34+
namespace nnet {
35+
36+
#if !defined(IS_BSP)
37+
// Definition for buffer locations for Avalon MM host.
38+
inline constexpr unsigned kInputBufferLocation = 0;
39+
inline constexpr unsigned kOutputBufferLocation = 1;
40+
#endif
41+
42+
// Name for DMAs.
43+
class IDInputDMA;
44+
class IDOutputDMA;
45+
46+
// Implementation of a direct memory access kernel. Move data from source, convert,
47+
// and send to the sink. Adaptive to SYCL HLS and hardware acceleration flow.
48+
template <class srcType, class dest_pipe, size_t SIZE>
49+
struct DMA_convert_data {
50+
#if !defined(IS_BSP)
51+
// When targeting a device family, we instantiate an Avalon Memory Mapped Host for
52+
// data transaction between host and the DMA kernel during emulation and simulation.
53+
sycl::ext::oneapi::experimental::annotated_arg<srcType *,
54+
decltype(sycl::ext::oneapi::experimental::properties{
55+
sycl::ext::intel::experimental::latency<0>,
56+
sycl::ext::intel::experimental::dwidth<8>,
57+
sycl::ext::intel::experimental::buffer_location<kInputBufferLocation>,
58+
sycl::ext::intel::experimental::read_write_mode_read,
59+
sycl::ext::intel::experimental::wait_request_requested})>
60+
#else
61+
// When targeting oneAPI BSP, we can use USM pointer to access host memory.
62+
srcType *const
63+
#endif
64+
src;
65+
66+
[[intel::kernel_args_restrict]]
67+
void operator()() const {
68+
69+
#if defined(IS_BSP)
70+
// Access data using host pointer.
71+
sycl::ext::intel::host_ptr<srcType> src_ptr(src);
72+
#else
73+
// Host allocation is not supported when targeting an FPGA family or part number.
74+
srcType *src_ptr(src);
75+
#endif
76+
// First, extract the PipeDataT from the pipe
77+
using PipeDataType = typename nnet::ExtractPipeType<dest_pipe>::value_type;
78+
// Then, extract the DataT from StreamingBeat
79+
using DstDataType = typename nnet::ExtractDataType<PipeDataType>::value_type;
80+
constexpr auto dstTypeSize = std::tuple_size<DstDataType>{};
81+
82+
[[intel::fpga_register]]
83+
typename nnet::ExtractPipeType<dest_pipe>::value_type ctype;
84+
85+
for (size_t i = 0; i < SIZE / dstTypeSize; i++) {
86+
#pragma unroll
87+
for (size_t j = 0; j < dstTypeSize; j++) {
88+
ctype.data[j] = src_ptr[i * dstTypeSize + j];
89+
}
90+
ctype.sop = (i == 0);
91+
ctype.eop = (i == (SIZE / dstTypeSize - 1));
92+
dest_pipe::write(ctype);
93+
}
94+
}
95+
};
96+
97+
// Symmetrical to the DMA_convert_data above.
98+
template <class src_pipe, class dstType, size_t SIZE>
99+
struct DMA_convert_data_back {
100+
#if !defined(IS_BSP)
101+
sycl::ext::oneapi::experimental::annotated_arg<dstType *,
102+
decltype(sycl::ext::oneapi::experimental::properties{
103+
sycl::ext::intel::experimental::latency<0>,
104+
sycl::ext::intel::experimental::dwidth<8>,
105+
sycl::ext::intel::experimental::buffer_location<kOutputBufferLocation>,
106+
sycl::ext::intel::experimental::read_write_mode_write,
107+
sycl::ext::intel::experimental::wait_request_requested})>
108+
#else
109+
dstType *const
110+
#endif
111+
dst;
112+
113+
[[intel::kernel_args_restrict]]
114+
void operator()() const {
115+
#if defined(IS_BSP)
116+
sycl::ext::intel::host_ptr<dstType> dst_ptr(dst);
117+
#else
118+
dstType *dst_ptr(dst);
119+
#endif
120+
constexpr auto srcTypeSize = std::tuple_size<typename nnet::ExtractPipeType<src_pipe>::value_type>{};
121+
122+
[[intel::fpga_register]]
123+
typename nnet::ExtractPipeType<src_pipe>::value_type ctype;
124+
125+
for (size_t i = 0; i < SIZE / srcTypeSize; i++) {
126+
ctype = src_pipe::read();
127+
#pragma unroll
128+
for (size_t j = 0; j < srcTypeSize; j++) {
129+
dst_ptr[i * srcTypeSize + j] = ctype[j].to_double();
130+
}
131+
}
132+
}
133+
};
134+
135+
} // namespace nnet
136+
11137
// Need to declare the input and output pipes
12138

13139
// hls-fpga-machine-learning insert inputs

hls4ml/templates/oneapi/firmware/nnet_utils/nnet_types.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include <tuple>
99
#include <utility>
1010

11+
#include <sycl/ext/intel/prototype/pipes_ext.hpp> // Streaming Beat and pipe properties.
12+
1113
namespace nnet {
1214

1315
// Define the pipe type that we use
@@ -34,6 +36,15 @@ struct ExtractPipeType<PipeClass<PipeName, PipeDataT, kPipeMinCapacity, PipeProp
3436
typedef PipeDataT value_type;
3537
};
3638

39+
// Helper template for extracting datatype from oneAPI StreamingBeat type.
40+
template <typename T> struct ExtractDataType { typedef T value_type; };
41+
42+
// Specialization on oneAPI StreamingBeat type.
43+
template <typename DataT, bool EnableSOP, bool EnableEmpty>
44+
struct ExtractDataType<sycl::ext::intel::experimental::StreamingBeat<DataT, EnableSOP, EnableEmpty>> {
45+
typedef DataT value_type;
46+
};
47+
3748
/*
3849
* HLS Shift Register Implementation
3950
* To verify a shift register is used in hardware, go to report.html > Area Analysis of System

0 commit comments

Comments
 (0)