Skip to content

Commit c307715

Browse files
committed
oneAPI backend simulation support.
1 parent 70054aa commit c307715

File tree

5 files changed

+80
-69
lines changed

5 files changed

+80
-69
lines changed

hls4ml/backends/oneapi/oneapi_types.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,12 @@ def definition_cpp(self, name_suffix='', as_reference=False):
170170
else:
171171
return f'{self.type.name} {self.name}{name_suffix}'
172172

173-
def declare_cpp(self, pipe_min_size=0, indent=''):
173+
# Updated pipe min size to be 32 for simulation.
174+
def declare_cpp(self, pipe_min_size=32, indent=''):
174175
# Updated to use streaming beat for restartable streaming kernel.
175176
# Streaming beat is a wrapper type of the actual type with sideband control signals.
176177
# Syntax: using BeatT = sycl::ext::intel::experimental::StreamingBeat<DataT, eop, empty>;
177-
streaming_beat_t = f"{self.type.name}BeatT"
178+
streaming_beat_t = f"{self.pipe_name}BeatT"
178179
lines = (
179180
f"{indent}class {self.pipe_id};\n"
180181
f"{indent}using {streaming_beat_t} = "

hls4ml/templates/oneapi/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,13 @@ set(LIBRARY_NAME myproject-${LIB_STAMP})
3838
# You can also specify a device family (E.g. "Arria10" or "Stratix10") or a
3939
# specific part number (E.g. "10AS066N3F40E2SG") to generate a standalone IP.
4040
if(NOT DEFINED FPGA_DEVICE)
41-
set(FPGA_DEVICE "Arria10")
41+
set(FPGA_DEVICE "Agilex7")
4242
endif()
4343

4444
# Use cmake -DUSER_FPGA_FLAGS=<flags> to set extra flags for FPGA backend
4545
# compilation.
46-
set(USER_FPGA_FLAGS -Wno-unused-label ${USER_FPGA_FLAGS})
46+
# -Xsoptimize=latency Turns off the hyper-optimized handshake
47+
set(USER_FPGA_FLAGS -Wno-unused-label;${USER_FPGA_FLAGS};-Xsoptimize=latency)
4748

4849
# Use cmake -DUSER_FLAGS=<flags> to set extra flags for general compilation.
4950
set(USER_FLAGS -Wno-unused-label -fconstexpr-steps=134217728 ${USER_FLAGS})

hls4ml/templates/oneapi/firmware/myproject.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ struct DMA_convert_data {
3737
sycl::ext::oneapi::experimental::annotated_arg<src_T *,
3838
decltype(sycl::ext::oneapi::experimental::properties{
3939
sycl::ext::intel::experimental::latency<0>,
40-
sycl::ext::intel::experimental::dwidth<8>,
40+
sycl::ext::intel::experimental::dwidth<16>,
4141
sycl::ext::intel::experimental::buffer_location<kInputBufferLocation>,
4242
sycl::ext::intel::experimental::read_write_mode_read,
4343
sycl::ext::intel::experimental::wait_request_requested})>
@@ -91,7 +91,7 @@ struct DMA_convert_data_back {
9191
sycl::ext::oneapi::experimental::annotated_arg<dst_T *,
9292
decltype(sycl::ext::oneapi::experimental::properties{
9393
sycl::ext::intel::experimental::latency<0>,
94-
sycl::ext::intel::experimental::dwidth<8>,
94+
sycl::ext::intel::experimental::dwidth<16>,
9595
sycl::ext::intel::experimental::buffer_location<kOutputBufferLocation>,
9696
sycl::ext::intel::experimental::read_write_mode_write,
9797
sycl::ext::intel::experimental::wait_request_requested})>

hls4ml/templates/oneapi/myproject_test.cpp

Lines changed: 70 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ int main(int argc, char **argv) {
8888
#define NUM_ITERATIONS 100
8989
auto selector = sycl::ext::intel::fpga_selector_v;
9090
#else // #if FPGA_EMULATOR
91-
#define NUM_ITERATIONS 100
91+
#define NUM_ITERATIONS 10
9292
auto selector = sycl::ext::intel::fpga_emulator_selector_v;
9393
#endif
9494

@@ -124,83 +124,90 @@ int main(int argc, char **argv) {
124124

125125
// hls-fpga-machine-learning insert runtime contant
126126

127+
try {
127128
#if defined(IS_BSP)
128-
// Allocate host memory if BSP is in use.
129-
float *vals = sycl::malloc_host<float>(kInputSz, q);
130-
if (vals == nullptr) {
131-
std::cerr << "ERROR: host allocation failed for input\n";
132-
fout.close();
133-
return 1;
134-
}
135-
float *outputs = sycl::malloc_host<float>(kOutputSz, q);
136-
if (output == nullptr) {
137-
std::cerr << "ERROR: host allocation failed for output\n";
138-
fout.close();
139-
return 1;
140-
}
129+
// Allocate host memory if BSP is in use.
130+
float *vals = sycl::malloc_host<float>(kInputSz, q);
131+
if (vals == nullptr) {
132+
std::cerr << "ERROR: host allocation failed for input\n";
133+
fout.close();
134+
return 1;
135+
}
136+
float *outputs = sycl::malloc_host<float>(kOutputSz, q);
137+
if (outputs == nullptr) {
138+
std::cerr << "ERROR: host allocation failed for output\n";
139+
fout.close();
140+
return 1;
141+
}
141142
#else
142-
float *vals = new float[kInputSz];
143-
float *outputs = new float[kOutputSz];
143+
float *vals = sycl::malloc_shared<float>(kInputSz, q, sycl::property_list{buffer_location(nnet::kInputBufferLocation)});
144+
float *outputs = sycl::malloc_shared<float>(kOutputSz, q, sycl::property_list{buffer_location(nnet::kOutputBufferLocation)});
144145
#endif
145146

146-
if (file_valid) {
147-
// Start always-run streaming kernel here, instead of inside a loop.
148-
q.single_task(MyProject{});
147+
if (file_valid) {
148+
// Start always-run streaming kernel here, instead of inside a loop.
149+
q.single_task(MyProject{});
149150

150-
// hls-fpga-machine-learning insert data
151+
// hls-fpga-machine-learning insert data
151152

152-
// hls-fpga-machine-learning convert output
153+
// hls-fpga-machine-learning convert output
153154

154-
// Print output from kernel and from prediction file.
155-
for (int i = 0; i < num_iterations; i++) {
156-
for (int j = 0; j < kOutLayerSize; j++) {
157-
fout << outputs[i * kOutLayerSize + j] << " ";
158-
}
159-
fout << std::endl;
160-
if (i % CHECKPOINT == 0) {
161-
std::cout << "Predictions" << std::endl;
162-
// hls-fpga-machine-learning insert predictions
163-
for (auto predval : predictions[i]) {
164-
std::cout << predval << " ";
155+
// Print output from kernel and from prediction file.
156+
for (int i = 0; i < num_iterations; i++) {
157+
for (int j = 0; j < kOutLayerSize; j++) {
158+
fout << outputs[i * kOutLayerSize + j] << " ";
165159
}
166-
std::cout << std::endl;
167-
std::cout << "Quantized predictions" << std::endl;
168-
// hls-fpga-machine-learning insert quantized
160+
fout << std::endl;
161+
if (i % CHECKPOINT == 0) {
162+
std::cout << "Predictions" << std::endl;
163+
// hls-fpga-machine-learning insert predictions
164+
for (auto predval : predictions[i]) {
165+
std::cout << predval << " ";
166+
}
167+
std::cout << std::endl;
168+
std::cout << "Quantized predictions" << std::endl;
169+
// hls-fpga-machine-learning insert quantized
170+
for (int j = 0; j < kOutLayerSize; j++) {
171+
std::cout << outputs[i * kOutLayerSize + j] << " ";
172+
}
173+
std::cout << std::endl;
174+
}
175+
}
176+
} else {
177+
std::cout << "INFO: Unable to open input/predictions file, using default input with " << num_iterations
178+
<< " invocations." << std::endl;
179+
q.single_task(MyProject{});
180+
// hls-fpga-machine-learning insert top-level-function
181+
// hls-fpga-machine-learning insert zero
182+
// hls-fpga-machine-learning convert output
183+
for (int i = 0; i < num_iterations; i++) {
169184
for (int j = 0; j < kOutLayerSize; j++) {
170185
std::cout << outputs[i * kOutLayerSize + j] << " ";
186+
fout << outputs[i * kOutLayerSize + j] << " ";
171187
}
172188
std::cout << std::endl;
189+
fout << std::endl;
173190
}
174191
}
175-
} else {
176-
std::cout << "INFO: Unable to open input/predictions file, using default input with " << num_iterations
177-
<< " invocations." << std::endl;
178-
179-
// hls-fpga-machine-learning insert top-level-function
180-
181-
// hls-fpga-machine-learning insert zero
182-
q.single_task(MyProject{});
183-
// hls-fpga-machine-learning convert output
184-
for (int i = 0; i < num_iterations; i++) {
185-
for (int j = 0; j < kOutLayerSize; j++) {
186-
std::cout << outputs[i * kOutLayerSize + j] << " ";
187-
fout << outputs[i * kOutLayerSize + j] << " ";
188-
}
189-
std::cout << std::endl;
190-
fout << std::endl;
192+
sycl::free(vals, q);
193+
sycl::free(outputs, q);
194+
fout.close();
195+
std::cout << "INFO: Saved inference results to file: " << RESULTS_LOG << std::endl;
196+
} catch (sycl::exception const &e) {
197+
// Catches exceptions in the host code.
198+
std::cerr << "Caught a SYCL host exception:\n"
199+
<< e.what() << "\n";
200+
201+
// Most likely the runtime couldn't find FPGA hardware!
202+
if (e.code().value() == CL_DEVICE_NOT_FOUND)
203+
{
204+
std::cerr << "If you are targeting an FPGA, please ensure that your "
205+
"system has a correctly configured FPGA board.\n";
206+
std::cerr << "Run sys_check in the oneAPI root directory to verify.\n";
207+
std::cerr << "If you are targeting the FPGA emulator, compile with "
208+
"-DFPGA_EMULATOR.\n";
191209
}
210+
std::terminate();
192211
}
193-
194-
// Free up resources.
195-
#if defined(IS_BSP)
196-
free(vals);
197-
free(outputs);
198-
#else
199-
delete[] vals;
200-
delete[] outputs;
201-
#endif
202-
fout.close();
203-
std::cout << "INFO: Saved inference results to file: " << RESULTS_LOG << std::endl;
204-
205212
return 0;
206213
}

hls4ml/writer/oneapi_writer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ def write_project_cpp(self, model):
205205
newline += indent * 2 + f'typename nnet::ExtractPipeType<{out.pipe_name}>::value_type {out_beat};\n'
206206
newline += indent * 2 + f'{out_beat}.data = {out.name};\n'
207207
newline += indent * 2 + f'{out.pipe_name}::write({out_beat});\n'
208+
newline += indent * 2 + '// stops the kernel when the last input seen.\n'
209+
newline += indent * 2 + f'keep_going = !{model_inputs[0].name}_beat.eop;\n'
208210
newline += f"{indent}}}\n"
209211
# don't need to add anything in io_stream
210212

0 commit comments

Comments
 (0)