Skip to content

Commit e6bbed9

Browse files
committed
Multiple minor fixes and updates: Updates the apr compress method, making go across all particle levels and removing bug. Removed dependency of BLOSC from writing paraview/matlab readable files. Added more file size output to the file IO
1 parent edf092f commit e6bbed9

File tree

8 files changed

+219
-132
lines changed

8 files changed

+219
-132
lines changed

examples/Example_compress_apr.cpp

Lines changed: 69 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Example_compress_apr -i input_image_tiff -d input_directory
1616
1717
-compress_type number (1 or 2) (1 - WNL compression (Default), 2 - prediction step with lossless, potential rounding error)
1818
-quantization_level (Default 1: higher increasing the loss nature of the WNL compression aproach)
19+
-compress_level (the IO uses BLOSC for lossless compression of the APR, this can be set from 1-9, where higher increases the compression level. Note, this can come at a significant time increase.)
1920
2021
e.g. Example_compress_apr -i nuc_apr.h5 -d /Test/Input_examples/ -compress_type 2
2122
@@ -59,17 +60,47 @@ int main(int argc, char **argv) {
5960
APRCompress<uint16_t> comp;
6061
ExtraParticleData<uint16_t> symbols;
6162

63+
//feel free to change
64+
unsigned int blosc_comp_type = BLOSC_ZSTD;
65+
unsigned int blosc_comp_level = options.compress_level;
66+
unsigned int blosc_shuffle = 1;
67+
6268
comp.set_quantization_factor(options.quantization_level); //set this to adjust the compression factor for WNL
6369
comp.set_compression_type(options.compress_type);
6470

65-
timer.start_timer("compress");
66-
apr.write_apr(options.directory ,name + "_compress",comp,BLOSC_ZSTD,1,2);
71+
//compress the APR and write to disk
72+
timer.start_timer("compress and write");
73+
FileSizeInfo fileSizeInfo = apr.write_apr(options.directory ,name + "_compress",comp,blosc_comp_type,blosc_comp_level,blosc_shuffle);
6774
timer.stop_timer();
6875

69-
timer.start_timer("decompress");
76+
float time_write = (float) timer.timings.back();
77+
78+
//read the APR and decompress
79+
timer.start_timer("read and decompress");
7080
apr.read_apr(options.directory + name + "_compress_apr.h5");
7181
timer.stop_timer();
7282

83+
float time_read = (float) timer.timings.back();
84+
85+
float original_pixel_image_size = (2.0f*apr.orginal_dimensions(0)*apr.orginal_dimensions(1)*apr.orginal_dimensions(2))/(1000000.0);
86+
std::cout << std::endl;
87+
std::cout << std::endl;
88+
std::cout << "Original image size: " << original_pixel_image_size << " MB" << std::endl;
89+
90+
float apr_compressed_file_size = fileSizeInfo.total_file_size;
91+
92+
std::cout << "Compressed (Lossy - WNL) APR: " << apr_compressed_file_size << " MB" << std::endl;
93+
std::cout << "Compression Ratio: " << original_pixel_image_size/apr_compressed_file_size << std::endl;
94+
std::cout << std::endl;
95+
std::cout << std::endl;
96+
97+
std::cout << "Effective Datarate Write (by original image size): " << original_pixel_image_size/time_write << " MB*/s" << std::endl;
98+
std::cout << "Effective Datarate Read (by original image size): " << original_pixel_image_size/time_read << " MB*/s" << std::endl;
99+
100+
std::cout << std::endl;
101+
std::cout << std::endl;
102+
103+
//writes the piece-wise constant reconstruction of the APR to file for comparison
73104
PixelData<uint16_t> img;
74105
apr.interp_img(img,apr.particles_intensities);
75106
std::string output = options.directory + name + "_compress.tif";
@@ -93,49 +124,53 @@ char* get_command_option(char **begin, char **end, const std::string &option)
93124
}
94125

95126

96-
cmdLineOptions read_command_line_options(int argc, char **argv){
127+
cmdLineOptions read_command_line_options(int argc, char **argv){
97128

98-
cmdLineOptions result;
129+
cmdLineOptions result;
99130

100-
if(argc == 1) {
101-
std::cerr << usage << std::endl;
102-
exit(1);
103-
}
131+
if(argc == 1) {
132+
std::cerr << usage << std::endl;
133+
exit(1);
134+
}
104135

105-
if(command_option_exists(argv, argv + argc, "-i"))
106-
{
107-
result.input = std::string(get_command_option(argv, argv + argc, "-i"));
108-
} else {
109-
std::cout << "Input file required" << std::endl;
110-
exit(2);
111-
}
136+
if(command_option_exists(argv, argv + argc, "-i"))
137+
{
138+
result.input = std::string(get_command_option(argv, argv + argc, "-i"));
139+
} else {
140+
std::cout << "Input file required" << std::endl;
141+
exit(2);
142+
}
112143

113-
if(command_option_exists(argv, argv + argc, "-d"))
114-
{
115-
result.directory = std::string(get_command_option(argv, argv + argc, "-d"));
116-
}
144+
if(command_option_exists(argv, argv + argc, "-d"))
145+
{
146+
result.directory = std::string(get_command_option(argv, argv + argc, "-d"));
147+
}
117148

118-
if(command_option_exists(argv, argv + argc, "-compress_type"))
119-
{
120-
result.compress_type = (unsigned int)std::stoi(std::string(get_command_option(argv, argv + argc, "-compress_type")));
121-
}
149+
if(command_option_exists(argv, argv + argc, "-compress_type"))
150+
{
151+
result.compress_type = (unsigned int)std::stoi(std::string(get_command_option(argv, argv + argc, "-compress_type")));
152+
}
122153

123-
if(result.compress_type > 2 || result.compress_type == 0){
154+
if(result.compress_type > 2 || result.compress_type == 0){
124155

125-
std::cerr << "Invalid Compression setting (1 or 2)" << std::endl;
126-
exit(1);
127-
}
156+
std::cerr << "Invalid Compression setting (1 or 2)" << std::endl;
157+
exit(1);
158+
}
128159

129160

130-
if(command_option_exists(argv, argv + argc, "-quantization_level"))
131-
{
132-
result.quantization_level =std::stof(std::string(get_command_option(argv, argv + argc, "-quantization_level")));
133-
}
161+
if(command_option_exists(argv, argv + argc, "-quantization_level"))
162+
{
163+
result.quantization_level =std::stof(std::string(get_command_option(argv, argv + argc, "-quantization_level")));
164+
}
134165

166+
if(command_option_exists(argv, argv + argc, "-compress_level"))
167+
{
168+
result.compress_level = (unsigned int)std::stoi(std::string(get_command_option(argv, argv + argc, "-compress_level")));
169+
}
135170

136171

137-
return result;
172+
return result;
138173

139-
}
174+
}
140175

141176

examples/Example_compress_apr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ struct cmdLineOptions{
1515
unsigned int compress_type=1;
1616
float quantization_level=1;
1717
bool stats_file = false;
18+
unsigned int compress_level = 2;
1819
};
1920

2021
cmdLineOptions read_command_line_options(int argc, char **argv);

examples/Example_get_apr.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ Advanced (Direct) Settings:
3333
#include "ConfigAPR.h"
3434
#include "Example_get_apr.h"
3535

36-
37-
3836
int main(int argc, char **argv) {
3937

4038
//input parsing

src/data_structures/APR/APR.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,15 @@ class APR {
5555
apr_writer.read_apr(*this,file_name);
5656
}
5757

58-
float write_apr(std::string save_loc,std::string file_name){
58+
FileSizeInfo write_apr(std::string save_loc,std::string file_name){
5959
return apr_writer.write_apr(*this, save_loc,file_name);
6060
}
6161

62-
float write_apr(std::string save_loc,std::string file_name,APRCompress<ImageType>& apr_compressor,unsigned int blosc_comp_type,unsigned int blosc_comp_level,unsigned int blosc_shuffle){
62+
FileSizeInfo write_apr(std::string save_loc,std::string file_name,APRCompress<ImageType>& apr_compressor,unsigned int blosc_comp_type,unsigned int blosc_comp_level,unsigned int blosc_shuffle){
6363
return apr_writer.write_apr((*this),save_loc, file_name, apr_compressor,blosc_comp_type ,blosc_comp_level,blosc_shuffle);
6464
}
6565

66-
float write_apr(std::string save_loc,std::string file_name,unsigned int blosc_comp_type,unsigned int blosc_comp_level,unsigned int blosc_shuffle){
66+
FileSizeInfo write_apr(std::string save_loc,std::string file_name,unsigned int blosc_comp_type,unsigned int blosc_comp_level,unsigned int blosc_shuffle){
6767
APRCompress<ImageType> apr_compressor;
6868
apr_compressor.set_compression_type(0);
6969
return apr_writer.write_apr((*this),save_loc, file_name, apr_compressor,blosc_comp_type ,blosc_comp_level,blosc_shuffle);
@@ -151,9 +151,9 @@ class APR {
151151
APRIterator<ImageType> apr_iterator(*this); //this is required for parallel access
152152
parts.data.resize(apr_iterator.total_number_particles());
153153

154-
#ifdef HAVE_OPENMP
155-
#pragma omp parallel for schedule(static) firstprivate(apr_iterator)
156-
#endif
154+
#ifdef HAVE_OPENMP
155+
#pragma omp parallel for schedule(static) firstprivate(apr_iterator)
156+
#endif
157157
for (uint64_t particle_number = 0; particle_number < apr_iterator.total_number_particles(); ++particle_number) {
158158
//needed step for any parallel loop (update to the next part)
159159
apr_iterator.set_iterator_to_particle_by_number(particle_number);

src/io/APRWriter.hpp

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@
1313
#include <memory>
1414

1515

16+
struct FileSizeInfo {
17+
float total_file_size=0;
18+
float intensity_data=0;
19+
float access_data=0;
20+
};
21+
22+
1623
struct AprType {hid_t hdf5type; const char * const typeName;};
1724
namespace AprTypes {
1825

@@ -174,7 +181,7 @@ class APRWriter {
174181
}
175182

176183
template<typename ImageType>
177-
float write_apr(APR<ImageType>& apr, const std::string &save_loc, const std::string &file_name) {
184+
FileSizeInfo write_apr(APR<ImageType>& apr, const std::string &save_loc, const std::string &file_name) {
178185
APRCompress<ImageType> apr_compressor;
179186
apr_compressor.set_compression_type(0);
180187
return write_apr(apr, save_loc, file_name, apr_compressor);
@@ -184,13 +191,14 @@ class APRWriter {
184191
* Writes the APR to the particle cell structure sparse format, using the p_map for reconstruction
185192
*/
186193
template<typename ImageType>
187-
float write_apr(APR<ImageType> &apr, const std::string &save_loc, const std::string &file_name, APRCompress<ImageType> &apr_compressor, unsigned int blosc_comp_type = BLOSC_ZSTD, unsigned int blosc_comp_level = 2, unsigned int blosc_shuffle=1) {
194+
FileSizeInfo write_apr(APR<ImageType> &apr, const std::string &save_loc, const std::string &file_name, APRCompress<ImageType> &apr_compressor, unsigned int blosc_comp_type = BLOSC_ZSTD, unsigned int blosc_comp_level = 2, unsigned int blosc_shuffle=1) {
188195
APRTimer write_timer;
189196
write_timer.verbose_flag = false;
190197

191198
std::string hdf5_file_name = save_loc + file_name + "_apr.h5";
192199
AprFile f{hdf5_file_name, AprFile::Operation::WRITE};
193-
if (!f.isOpened()) return 0;
200+
FileSizeInfo fileSizeInfo1;
201+
if (!f.isOpened()) return fileSizeInfo1;
194202

195203
// ------------- write metadata -------------------------
196204
writeAttr(AprTypes::NumberOfXType, f.groupId, &apr.apr_access.org_dims[1]);
@@ -225,14 +233,7 @@ class APRWriter {
225233
writeAttr(AprTypes::NoiseSdEstimateType, f.groupId, &apr.parameters.noise_sd_estimate);
226234
writeAttr(AprTypes::BackgroundIntensityEstimateType, f.groupId, &apr.parameters.background_intensity_estimate);
227235

228-
// ------------- write data ----------------------------
229-
write_timer.start_timer("intensities");
230-
if (compress_type_num > 0){
231-
apr_compressor.compress(apr,apr.particles_intensities);
232-
}
233-
hid_t type = Hdf5Type<ImageType>::type();
234-
writeData({type, AprTypes::ParticleIntensitiesType}, f.objectId, apr.particles_intensities.data, blosc_comp_type, blosc_comp_level, blosc_shuffle);
235-
write_timer.stop_timer();
236+
236237

237238
write_timer.start_timer("access_data");
238239
MapStorageData map_data;
@@ -262,13 +263,35 @@ class APRWriter {
262263
writeAttr(AprTypes::NumberOfLevelZType, i, f.groupId, &z_num);
263264
}
264265

266+
265267
// ------------- output the file size -------------------
266268
hsize_t file_size = f.getFileSize();
269+
double sizeMB_access = file_size / 1e6;
270+
271+
FileSizeInfo fileSizeInfo;
272+
fileSizeInfo.access_data = sizeMB_access;
273+
274+
// ------------- write data ----------------------------
275+
write_timer.start_timer("intensities");
276+
if (compress_type_num > 0){
277+
apr_compressor.compress(apr,apr.particles_intensities);
278+
}
279+
hid_t type = Hdf5Type<ImageType>::type();
280+
writeData({type, AprTypes::ParticleIntensitiesType}, f.objectId, apr.particles_intensities.data, blosc_comp_type, blosc_comp_level, blosc_shuffle);
281+
write_timer.stop_timer();
282+
283+
// ------------- output the file size -------------------
284+
file_size = f.getFileSize();
267285
double sizeMB = file_size / 1e6;
268-
std::cout << "HDF5 Filesize: " << sizeMB << " MB\n" << "Writing Complete" << std::endl;
269-
return sizeMB;
286+
287+
fileSizeInfo.total_file_size = sizeMB;
288+
fileSizeInfo.intensity_data = fileSizeInfo.total_file_size - fileSizeInfo.access_data;
289+
290+
std::cout << "HDF5 Total Filesize: " << sizeMB << " MB\n" << "Writing Complete" << std::endl;
291+
return fileSizeInfo;
270292
}
271293

294+
272295
template<typename ImageType,typename T>
273296
void write_apr_paraview(APR<ImageType> &apr, const std::string &save_loc, const std::string &file_name, const ExtraParticleData<T> &parts) {
274297
std::string hdf5_file_name = save_loc + file_name + "_paraview.h5";
@@ -284,10 +307,7 @@ class APRWriter {
284307
writeAttr(AprTypes::TotalNumberOfParticlesType, f.groupId, &apr.apr_access.total_number_particles);
285308

286309
// ------------- write data ----------------------------
287-
unsigned int blosc_comp_level = 1;
288-
unsigned int blosc_shuffle = 2;
289-
unsigned int blosc_comp_type = BLOSC_ZSTD;
290-
writeData({(Hdf5Type<T>::type()), AprTypes::ParticlePropertyType}, f.objectId, parts.data, blosc_comp_type, blosc_comp_level, blosc_shuffle);
310+
writeDataStandard({(Hdf5Type<T>::type()), AprTypes::ParticlePropertyType}, f.objectId, parts.data);
291311

292312
APRIterator<ImageType> apr_iterator(apr);
293313
std::vector<uint16_t> xv(apr_iterator.total_number_particles());
@@ -296,9 +316,9 @@ class APRWriter {
296316
std::vector<uint8_t> levelv(apr_iterator.total_number_particles());
297317
std::vector<uint8_t> typev(apr_iterator.total_number_particles());
298318

299-
#ifdef HAVE_OPENMP
300-
#pragma omp parallel for schedule(static) firstprivate(apr_iterator)
301-
#endif
319+
#ifdef HAVE_OPENMP
320+
#pragma omp parallel for schedule(static) firstprivate(apr_iterator)
321+
#endif
302322
for (uint64_t particle_number= 0; particle_number < apr_iterator.total_number_particles(); ++particle_number) {
303323
apr_iterator.set_iterator_to_particle_by_number(particle_number);
304324
xv[particle_number] = apr_iterator.x_global();
@@ -307,11 +327,11 @@ class APRWriter {
307327
levelv[particle_number] = apr_iterator.level();
308328
typev[particle_number] = apr_iterator.type();
309329
}
310-
writeData(AprTypes::ParaviewXType, f.objectId, xv, blosc_comp_type, blosc_comp_level, blosc_shuffle);
311-
writeData(AprTypes::ParaviewYType, f.objectId, yv, blosc_comp_type, blosc_comp_level, blosc_shuffle);
312-
writeData(AprTypes::ParaviewZType, f.objectId, zv, blosc_comp_type, blosc_comp_level, blosc_shuffle);
313-
writeData(AprTypes::ParaviewLevelType, f.objectId, levelv, blosc_comp_type, blosc_comp_level, blosc_shuffle);
314-
writeData(AprTypes::ParaviewTypeType, f.objectId, typev, blosc_comp_type, blosc_comp_level, blosc_shuffle);
330+
writeDataStandard(AprTypes::ParaviewXType, f.objectId, xv);
331+
writeDataStandard(AprTypes::ParaviewYType, f.objectId, yv);
332+
writeDataStandard(AprTypes::ParaviewZType, f.objectId, zv);
333+
writeDataStandard(AprTypes::ParaviewLevelType, f.objectId, levelv);
334+
writeDataStandard(AprTypes::ParaviewTypeType, f.objectId, typev);
315335

316336
// TODO: This needs to be able extended to handle more general type, currently it is assuming uint16
317337
write_main_paraview_xdmf_xml(save_loc,hdf5_file_name, file_name,apr_iterator.total_number_particles());
@@ -487,6 +507,13 @@ class APRWriter {
487507
hdf5_write_data_blosc(aObjectId, aType.hdf5type, aType.typeName, rank, dims, aContainer.data(), blosc_comp_type, blosc_comp_level, blosc_shuffle);
488508
}
489509

510+
template<typename T>
511+
void writeDataStandard(const AprType &aType, hid_t aObjectId, T aContainer) {
512+
hsize_t dims[] = {aContainer.size()};
513+
const hsize_t rank = 1;
514+
hdf5_write_data_standard(aObjectId, aType.hdf5type, aType.typeName, rank, dims, aContainer.data());
515+
}
516+
490517
void writeString(AprType aTypeName, hid_t aGroupId, const std::string &aValue) {
491518
if (aValue.size() > 0){
492519
hid_t aid = H5Screate(H5S_SCALAR);

src/io/hdf5functions_blosc.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,37 @@ void hdf5_write_data_blosc(hid_t obj_id, hid_t type_id, const char *ds_name, hsi
6969
H5Pclose(plist_id);
7070
}
7171

72+
/**
73+
* writes data to the hdf5 file or group identified by obj_id of hdf5 datatype data_type without using blosc
74+
*/
75+
void hdf5_write_data_standard(hid_t obj_id, hid_t type_id, const char *ds_name, hsize_t rank, hsize_t *dims, void *data) {
76+
hid_t plist_id = H5Pcreate(H5P_DATASET_CREATE);
77+
78+
// Dataset must be chunked for compression
79+
const uint64_t max_size = 100000;
80+
hsize_t cdims = (dims[0] < max_size) ? dims[0] : max_size;
81+
rank = 1;
82+
H5Pset_chunk(plist_id, rank, &cdims);
83+
84+
//compression parameters
85+
int deflate_level = 9;
86+
87+
/////SET COMPRESSION TYPE /////
88+
89+
//DEFLATE ENCODING (GZIP)
90+
H5Pset_deflate (plist_id, deflate_level);
91+
92+
//create write and close
93+
hid_t space_id = H5Screate_simple(rank, dims, NULL);
94+
hid_t dset_id = H5Dcreate2(obj_id, ds_name, type_id, space_id, H5P_DEFAULT, plist_id, H5P_DEFAULT);
95+
H5Dwrite(dset_id,type_id,H5S_ALL,H5S_ALL,H5P_DEFAULT,data);
96+
H5Dclose(dset_id);
97+
98+
H5Pclose(plist_id);
99+
100+
}
101+
102+
72103
/**
73104
* writes data to the hdf5 file or group identified by obj_id of hdf5 datatype data_type
74105
*/

src/io/hdf5functions_blosc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ void hdf5_load_data_blosc(hid_t obj_id, void* buff, const char* data_name);
2828
void hdf5_load_data_blosc(hid_t obj_id, hid_t dataType, void* buff, const char* data_name);
2929
void hdf5_write_attribute_blosc(hid_t obj_id,hid_t type_id,const char* attr_name,hsize_t rank,hsize_t* dims, const void * const data );
3030
void hdf5_write_data_blosc(hid_t obj_id,hid_t type_id,const char* ds_name,hsize_t rank,hsize_t* dims, void* data ,unsigned int comp_type,unsigned int comp_level,unsigned int shuffle);
31+
void hdf5_write_data_standard(hid_t obj_id,hid_t type_id,const char* ds_name,hsize_t rank,hsize_t* dims, void* data );
3132
void write_main_paraview_xdmf_xml(const std::string &aDestinationDir,const std::string &aHdf5FileName, const std::string &aParaviewFileName, uint64_t aNumOfParticles);
3233

3334

0 commit comments

Comments
 (0)