Skip to content

Commit 0b8ef13

Browse files
committed
Refactoring oneAPI backend myproject_test.
1 parent 257385a commit 0b8ef13

File tree

2 files changed

+112
-48
lines changed

2 files changed

+112
-48
lines changed

hls4ml/templates/oneapi/myproject_test.cpp

Lines changed: 101 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include <fstream>
55
#include <iostream>
66
#include <string>
7+
#include <string_view>
78
#include <vector>
89

910
#include "firmware/myproject.h"
@@ -20,45 +21,29 @@
2021

2122
#define CHECKPOINT 5000
2223

23-
int main(int argc, char **argv) {
24-
25-
#if FPGA_SIMULATOR
26-
auto selector = sycl::ext::intel::fpga_simulator_selector_v;
27-
#elif FPGA_HARDWARE
28-
auto selector = sycl::ext::intel::fpga_selector_v;
29-
#else // #if FPGA_EMULATOR
30-
auto selector = sycl::ext::intel::fpga_emulator_selector_v;
24+
#if not defined(IS_BSP)
25+
using sycl::ext::intel::experimental::property::usm::buffer_location;
3126
#endif
3227

33-
sycl::queue q(selector, fpga_tools::exception_handler, sycl::property::queue::enable_profiling{});
34-
35-
auto device = q.get_device();
36-
37-
// make sure the device supports USM host allocations
38-
if (!device.has(sycl::aspect::usm_host_allocations)) {
39-
std::cerr << "This design must either target a board that supports USM "
40-
"Host/Shared allocations, or IP Component Authoring. "
41-
<< std::endl;
42-
std::terminate();
43-
}
44-
45-
std::cout << "Running on device: " << device.get_info<sycl::info::device::name>().c_str() << std::endl;
46-
28+
// Functions that reads input and prediction data from files.
29+
// Returns `true` if files are read successfully and not empty.
30+
// Returns `false` otherwise.
31+
bool prepare_data_from_file(
32+
std::string &fin_path,
33+
std::string &fpr_path,
34+
std::vector<std::vector<float>> &inputs,
35+
std::vector<std::vector<float>> &predictions
36+
) {
4737
// load input data from text file
48-
std::ifstream fin("tb_data/tb_input_features.dat");
38+
std::ifstream fin(fin_path.c_str());
4939
// load predictions from text file
50-
std::ifstream fpr("tb_data/tb_output_predictions.dat");
51-
52-
std::string RESULTS_LOG = "tb_data/results.log";
53-
std::ofstream fout(RESULTS_LOG);
54-
40+
std::ifstream fpr(fpr_path.c_str());
41+
5542
std::string iline;
5643
std::string pline;
5744

5845
if (fin.is_open() && fpr.is_open()) {
59-
std::vector<std::vector<float>> inputs;
60-
std::vector<std::vector<float>> predictions;
61-
unsigned int num_iterations = 0;
46+
size_t num_iterations = 0;
6247

6348
// Prepare input data from file. Load predictions from file.
6449
for (; std::getline(fin, iline) && std::getline(fpr, pline); num_iterations++) {
@@ -83,7 +68,82 @@ int main(int argc, char **argv) {
8368
std::copy(pr.cbegin(), pr.cend(), predictions.back().begin());
8469
std::copy(in.cbegin(), in.cend(), inputs.back().begin());
8570
}
71+
fin.close();
72+
fpr.close();
73+
if (inputs.empty())
74+
return false;
75+
else
76+
return true;
77+
} else {
78+
return false;
79+
}
80+
}
8681

82+
int main(int argc, char **argv) {
83+
84+
#if FPGA_SIMULATOR
85+
#define NUM_ITERATIONS 5
86+
auto selector = sycl::ext::intel::fpga_simulator_selector_v;
87+
#elif FPGA_HARDWARE
88+
#define NUM_ITERATIONS 100
89+
auto selector = sycl::ext::intel::fpga_selector_v;
90+
#else // #if FPGA_EMULATOR
91+
#define NUM_ITERATIONS 100
92+
auto selector = sycl::ext::intel::fpga_emulator_selector_v;
93+
#endif
94+
95+
sycl::queue q(selector, fpga_tools::exception_handler, sycl::property::queue::enable_profiling{});
96+
97+
auto device = q.get_device();
98+
99+
// make sure the device supports USM host allocations
100+
if (!device.has(sycl::aspect::usm_host_allocations)) {
101+
std::cerr << "This design must either target a board that supports USM "
102+
"Host/Shared allocations, or IP Component Authoring. "
103+
<< std::endl;
104+
std::terminate();
105+
}
106+
107+
std::cout << "Running on device: " << device.get_info<sycl::info::device::name>().c_str() << std::endl;
108+
109+
std::string INPUT_FILE = "tb_data/tb_input_features.dat";
110+
std::string PRED_FILE = "tb_data/tb_output_predictions.dat";
111+
std::string RESULTS_LOG = "tb_data/results.log";
112+
std::ofstream fout(RESULTS_LOG);
113+
114+
// Allocate vectors on stack to hold data from files temporarily.
115+
std::vector<std::vector<float>> inputs;
116+
std::vector<std::vector<float>> predictions;
117+
bool file_valid = prepare_data_from_file(INPUT_FILE, PRED_FILE, inputs, predictions);
118+
unsigned int num_iterations;
119+
if (file_valid) {
120+
num_iterations = inputs.size();
121+
} else {
122+
num_iterations = NUM_ITERATIONS;
123+
}
124+
125+
// hls-fpga-machine-learning insert runtime contant
126+
127+
#if defined(IS_BSP)
128+
// Allocate host memory if BSP is in use.
129+
float *vals = sycl::malloc_host<float>(kInputSz, q);
130+
if (vals == nullptr) {
131+
std::cerr << "ERROR: host allocation failed for input\n";
132+
fout.close();
133+
return 1;
134+
}
135+
float *outputs = sycl::malloc_host<float>(kOutputSz, q);
136+
if (output == nullptr) {
137+
std::cerr << "ERROR: host allocation failed for output\n";
138+
fout.close();
139+
return 1;
140+
}
141+
#else
142+
float *vals = new float[kInputSz];
143+
float *outputs = new float[kOutputSz];
144+
#endif
145+
146+
if (file_valid) {
87147
// Start always-run streaming kernel here, instead of inside a loop.
88148
q.single_task(MyProject{});
89149

@@ -106,18 +166,13 @@ int main(int argc, char **argv) {
106166
std::cout << std::endl;
107167
std::cout << "Quantized predictions" << std::endl;
108168
// hls-fpga-machine-learning insert quantized
109-
for (int j = 0; j < kOutLayerSize /* defined in convert output */; j++) {
169+
for (int j = 0; j < kOutLayerSize; j++) {
110170
std::cout << outputs[i * kOutLayerSize + j] << " ";
111171
}
112172
std::cout << std::endl;
113173
}
114174
}
115-
delete[] vals;
116-
delete[] outputs;
117-
fin.close();
118-
fpr.close();
119175
} else {
120-
constexpr unsigned int num_iterations = 10;
121176
std::cout << "INFO: Unable to open input/predictions file, using default input with " << num_iterations
122177
<< " invocations." << std::endl;
123178

@@ -127,16 +182,23 @@ int main(int argc, char **argv) {
127182
q.single_task(MyProject{});
128183
// hls-fpga-machine-learning convert output
129184
for (int i = 0; i < num_iterations; i++) {
130-
for (int j = 0; j < kOutLayerSize /* defined in convert output */; j++) {
185+
for (int j = 0; j < kOutLayerSize; j++) {
131186
std::cout << outputs[i * kOutLayerSize + j] << " ";
132187
fout << outputs[i * kOutLayerSize + j] << " ";
133188
}
134189
std::cout << std::endl;
135190
fout << std::endl;
136191
}
137-
delete[] outputs;
138192
}
139193

194+
// Free up resources.
195+
#if defined(IS_BSP)
196+
free(vals);
197+
free(outputs);
198+
#else
199+
delete[] vals;
200+
delete[] outputs;
201+
#endif
140202
fout.close();
141203
std::cout << "INFO: Saved inference results to file: " << RESULTS_LOG << std::endl;
142204

hls4ml/writer/oneapi_writer.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -412,12 +412,20 @@ def write_test_bench(self, model):
412412
newline = line
413413
for bram in model_brams:
414414
newline += f'#include \"firmware/weights/{bram.name}.h\"\n'
415+
elif '// hls-fpga-machine-learning insert runtime contant' in line:
416+
newline = line
417+
insert_constant_lines = (
418+
f'{indent}const size_t kInputSz = {model_inputs[0].size_cpp()} * num_iterations;\n'
419+
f'{indent}const size_t kOutputSz = {model_outputs[0].size_cpp()} * num_iterations;\n'
420+
f'{indent}const size_t kInputLayerSize = {model_inputs[0].size_cpp()};\n'
421+
f'{indent}const size_t kOutLayerSize = {model_outputs[0].size_cpp()};\n'
422+
)
423+
newline += insert_constant_lines;
415424
elif '// hls-fpga-machine-learning insert zero' in line:
416425
newline = line
417426
inp = model_inputs[0]
418427
insert_zero_lines = (
419-
f'{indent}float vals[{inp.size_cpp()} * num_iterations]; \n'
420-
f'{indent}for (int j = 0 ; j < {inp.size_cpp()} * num_iterations; j++)\n'
428+
f'{indent}for (int j = 0 ; j < kInputSz; j++)\n'
421429
f'{indent} vals[j] = 0.0;\n'
422430
f'{indent}q.single_task(nnet::DMA_convert_data<float, {inp.pipe_name}>{{vals, num_iterations}});\n'
423431
)
@@ -426,8 +434,6 @@ def write_test_bench(self, model):
426434
newline = line
427435
inp = model_inputs[0]
428436
insert_data_lines = (
429-
f'{indent}constexpr size_t kInputLayerSize = {inp.size_cpp()};\n'
430-
f'{indent}float *vals = new float[kInputLayerSize * num_iterations];\n'
431437
f'{indent}for (int i = 0; i < num_iterations; i++)\n'
432438
f'{indent} for (int j = 0 ; j < kInputLayerSize; j++)\n'
433439
f'{indent} vals[i * kInputLayerSize + j] = inputs[i][j]; \n'
@@ -437,12 +443,8 @@ def write_test_bench(self, model):
437443
elif '// hls-fpga-machine-learning convert output' in line:
438444
newline = line
439445
out = model_outputs[0]
440-
output_lines = (
441-
f'{indent}float *outputs = new float[{out.size_cpp()} * num_iterations];\n'
446+
newline += \
442447
f'{indent}q.single_task(nnet::DMA_convert_data_back<{out.pipe_name}, float>{{outputs, num_iterations}}).wait();\n'
443-
f'{indent}constexpr size_t kOutLayerSize = {out.size_cpp()};\n'
444-
)
445-
newline += output_lines
446448
else:
447449
newline = line
448450

0 commit comments

Comments
 (0)