Skip to content

Commit 5a4e66f

Browse files
authored
Merge pull request #134 from AdaptiveParticles/develop_joel
Merge develop_joel into master
2 parents c2da517 + 4dcddb7 commit 5a4e66f

File tree

85 files changed

+12640
-7426
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+12640
-7426
lines changed

.gitmodules

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,3 @@
77
[submodule "external/c-blosc"]
88
path = external/c-blosc
99
url = https://github.com/Blosc/c-blosc
10-
[submodule "external/pybind11"]
11-
path = external/pybind11
12-
url = https://github.com/pybind/pybind11.git

CMakeLists.txt

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -153,21 +153,28 @@ add_library(aprObjLib OBJECT ${SOURCE_FILES} ${SOURCE_FILES_RAYCAST})
153153
if(APR_USE_CUDA)
154154
message(STATUS "APR: Building CUDA for APR")
155155
set(CMAKE_CUDA_STANDARD 14)
156-
set(CMAKE_CUDA_FLAGS "--default-stream per-thread --cudart shared -g -lineinfo -Xptxas -O3,-v -use_fast_math -DAPR_USE_CUDA")
156+
set(CMAKE_CUDA_RUNTIME_LIBRARY "Static")
157+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --default-stream per-thread -Xptxas -v -DAPR_USE_CUDA")
158+
set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math") # -lineinfo for profiling
159+
set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g -G")
157160
if(APR_BENCHMARK)
158161
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DAPR_BENCHMARK")
159162
endif()
160163
enable_language(CUDA)
161164
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DAPR_USE_CUDA")
162165
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DAPR_USE_CUDA")
163-
set(APR_CUDA_SROUCE_FILES
166+
set(APR_CUDA_SOURCE_FILES
164167
src/algorithm/ComputeGradientCuda.cu
165168
src/data_structures/Mesh/PixelData.cu
166169
src/algorithm/LocalIntensityScale.cu
167170
src/algorithm/OVPC.cu
168171
src/data_structures/APR/access/GPUAccess.cu
172+
src/numerics/miscCuda.cu
169173
src/numerics/APRDownsampleGPU.cu
170-
src/numerics/APRIsoConvGPU.cu
174+
src/numerics/PixelNumericsGPU.cu
175+
src/numerics/APRIsoConvGPU333.cu
176+
src/numerics/APRIsoConvGPU555.cu
177+
src/numerics/APRNumericsGPU.cu
171178
)
172179
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
173180

@@ -176,7 +183,7 @@ endif()
176183
if(APR_BUILD_STATIC_LIB)
177184
# generate static library used as a intermediate step in generating fat lib
178185
set(STATIC_TARGET_NAME staticLib)
179-
add_library(${STATIC_TARGET_NAME} STATIC $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SROUCE_FILES})
186+
add_library(${STATIC_TARGET_NAME} STATIC $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SOURCE_FILES})
180187
target_compile_features(${STATIC_TARGET_NAME} PUBLIC cxx_std_14)
181188
set_target_properties(${STATIC_TARGET_NAME} PROPERTIES OUTPUT_NAME ${LIBRARY_NAME})
182189
set_target_properties(${STATIC_TARGET_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION OFF)
@@ -192,7 +199,7 @@ endif()
192199
if(APR_BUILD_SHARED_LIB)
193200
# generate fat shared library
194201
set(SHARED_TARGET_NAME sharedLib)
195-
add_library(${SHARED_TARGET_NAME} SHARED $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SROUCE_FILES})
202+
add_library(${SHARED_TARGET_NAME} SHARED $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SOURCE_FILES})
196203

197204
target_include_directories(${SHARED_TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src> $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>)
198205
set_target_properties(${SHARED_TARGET_NAME} PROPERTIES OUTPUT_NAME ${LIBRARY_NAME})
@@ -309,11 +316,6 @@ endif(APR_TESTS)
309316
###############################################################################
310317

311318
if(APR_BENCHMARK)
312-
message(STATUS "APR: Benchmarking performance")
319+
message(STATUS "APR: Building performance benchmarks")
313320
add_subdirectory(benchmarks)
314321
endif(APR_BENCHMARK)
315-
316-
###############################################################################
317-
# PYTHON wrappers These are now external please ask us for access
318-
###############################################################################
319-

README.md

Lines changed: 73 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,69 @@ Labeled Zebrafish nuclei: Gopi Shah, Huisken Lab ([MPI-CBG](https://www.mpi-cbg.
99
[![Build Status](https://travis-ci.org/AdaptiveParticles/LibAPR.svg?branch=master)](https://travis-ci.org/AdaptiveParticles/LibAPR)
1010
[![DOI](https://zenodo.org/badge/70479293.svg)](https://zenodo.org/badge/latestdoi/70479293)
1111

12+
13+
## Python support
14+
15+
We now provide python wrappers in a separate repository [PyLibAPR](https://github.com/AdaptiveParticles/PyLibAPR)
16+
17+
In addition to providing wrappers for most of the LibAPR functionality, the Python library contains a number of new features that simplify the generation and handling of the APR. For example:
18+
19+
* Interactive APR conversion
20+
* Interactive APR z-slice viewer
21+
* Interactive APR raycast (maximum intensity projection) viewer
22+
* Interactive lossy compression of particle intensities
23+
24+
25+
## Version 2.0 release notes
26+
27+
The library has changed significantly since release 1.1. __There are changes to IO and iteration that are not compatible
28+
with the older version__.
29+
30+
* New (additional) linear access data structure, explicitly storing coordinates in the sparse dimension,
31+
similar to Compressed Sparse Row.
32+
* Block-based decomposition of the APR generation pipeline, allowing conversion of very large images.
33+
* Expanded and improved functionality for image processing directly on the APR:
34+
* APR filtering (spatial convolutions).
35+
* [APRNumerics](./src/numerics/APRNumerics.hpp) module, including e.g. gradient computations and Richardson-Lucy deconvolution.
36+
* CUDA GPU-accelerated convolutions and RL deconvolution (currently only supports dense 3x3x3 and 5x5x5 stencils)
37+
38+
1239
## Dependencies
1340

1441
* HDF5 1.8.20 or higher
15-
* OpenMP > 3.0 (optional, but suggested)
42+
* OpenMP > 3.0 (optional, but recommended)
1643
* CMake 3.6 or higher
1744
* LibTIFF 4.0 or higher
1845

19-
NB: This update to 2.0 introduces changes to IO and iteration that are not compatable with old versions.
20-
2146
## Building
2247

23-
The repository requires sub-modules, so the repository needs to be cloned recursively:
48+
The repository requires submodules, and needs to be cloned recursively:
2449

2550
```
26-
git clone --recursive https://github.com/cheesema/LibAPR
51+
git clone --recursive https://github.com/AdaptiveParticles/LibAPR.git
2752
```
2853

29-
If you need to update your clone at any point later, run
54+
### CMake build options
3055

56+
Several CMake options can be given to control the build. Use the `-D` argument to set each
57+
desired option. For example, to disable OpenMP, change the cmake calls below to
3158
```
32-
git pull
33-
git submodule update
59+
cmake -DAPR_USE_OPENMP=OFF ..
3460
```
3561

62+
| Option | Description | Default value |
63+
|:--|:--|:--|
64+
| APR_BUILD_SHARED_LIB | Build shared library | ON |
65+
| APR_BUILD_STATIC_LIB | Build static library | OFF |
66+
| APR_BUILD_EXAMPLES | Build executable examples | OFF |
67+
| APR_TESTS | Build unit tests | OFF |
68+
| APR_BENCHMARK | Build executable performance benchmarks | OFF |
69+
| APR_USE_LIBTIFF | Enable LibTIFF (Required for tests and examples) | ON |
70+
| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | OFF |
71+
| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | OFF |
72+
| APR_USE_OPENMP | Enable multithreading via OpenMP | ON |
73+
| APR_USE_CUDA | Enable CUDA (Under development - APR conversion pipeline is currently not working with CUDA enabled) | OFF |
74+
3675
### Building on Linux
3776

3877
On Ubuntu, install the `cmake`, `build-essential`, `libhdf5-dev` and `libtiff5-dev` packages (on other distributions, refer to the documentation there, the package names will be similar). OpenMP support is provided by the GCC compiler installed as part of the `build-essential` package.
@@ -46,11 +85,7 @@ cmake ..
4685
make
4786
```
4887

49-
This will create the `libapr.so` library in the `build` directory, as well as all of the examples.
50-
51-
### Docker build
52-
53-
We provide a working Dockerfile that install the library within the image on a separate [repo](https://github.com/MSusik/libaprdocker).
88+
This will create the `libapr.so` library in the `build` directory.
5489

5590
### Building on OSX
5691

@@ -67,7 +102,8 @@ cmake ..
67102
make
68103
```
69104

70-
This will create the `libapr.dylib` library in the `build` directory, as well as all of the examples.
105+
This will create the `libapr.dylib` library in the `build` directory.
106+
71107

72108
In case you want to use the homebrew-installed clang (OpenMP support), modify the call to `cmake` above to
73109

@@ -96,56 +132,60 @@ cmake -G "Visual Studio 15 2017 Win64" -DTIFF_INCLUDE_DIR="C:/Program Files/tiff
96132
cmake --build . --config Debug
97133
```
98134

99-
This will set the appropriate hints for Visual Studio to find both LibTIFF and HDF5. This will create the `apr.dll` library in the `build/Debug` directory, as well as all of the examples. If you need a `Release` build, run `cmake --build . --config Release` from the `build` directory.
135+
This will set the appropriate hints for Visual Studio to find both LibTIFF and HDF5. This will create the `apr.dll` library in the `build/Debug` directory. If you need a `Release` build, run `cmake --build . --config Release` from the `build` directory.
136+
137+
### Docker build
138+
139+
We provide a working Dockerfile that installs the library within the image in a separate [repository](https://github.com/MSusik/libaprdocker).
140+
141+
Note: not recently tested.
100142

101143
## Examples and Documentation
102-
These examples can be turned on by adding -DAPR_BUILD_EXAMPLES=ON to the cmake command.
103144

104-
There are nine basic examples, that show how to generate and compute with the APR:
145+
There are 12 basic examples, that show how to generate and compute with the APR. These can be built by adding
146+
-DAPR_BUILD_EXAMPLES=ON to the cmake command.
105147

106148
| Example | How to ... |
107149
|:--|:--|
108150
| [Example_get_apr](./examples/Example_get_apr.cpp) | create an APR from a TIFF and store as hdf5. |
109-
| [Example_apr_iterate](./examples/Example_apr_iterate.cpp) | iterate through a given APR. |
151+
| [Example_get_apr_by_block](./examples/Example_get_apr_by_block.cpp) | create an APR from a (potentially large) TIFF, by decomposing it into smaller blocks, and store as hdf5.
152+
| [Example_apr_iterate](./examples/Example_apr_iterate.cpp) | iterate over APR particles and their spatial properties. |
153+
| [Example_apr_tree](./examples/Example_apr_tree.cpp) | iterate over interior APR tree particles and their spatial properties. |
110154
| [Example_neighbour_access](./examples/Example_neighbour_access.cpp) | access particle and face neighbours. |
111155
| [Example_compress_apr](./examples/Example_compress_apr.cpp) | additionally compress the intensities stored in an APR. |
112156
| [Example_random_access](./examples/Example_random_access.cpp) | perform random access operations on particles. |
113-
| [Example_ray_cast](./examples/Example_ray_cast.cpp) | perform a maximum intensity projection ray cast directly on the APR data structures read from an APR. |
157+
| [Example_ray_cast](./examples/Example_ray_cast.cpp) | perform a maximum intensity projection ray cast directly on the APR. |
114158
| [Example_reconstruct_image](./examples/Example_reconstruct_image.cpp) | reconstruct a pixel image from an APR. |
159+
| [Example_compute_gradient](./examples/Example_compute_gradient.cpp) | compute the gradient magnitude of an APR. |
160+
| [Example_apr_filter](./examples/Example_apr_filter.cpp) | apply a filter (convolution) to an APR. |
161+
| [Example_apr_deconvolution](./examples/Example_apr_deconvolution.cpp) | perform Richardson-Lucy deconvolution on an APR. |
115162

116-
All examples except Example_get_apr require an already produced APR, such as those created by Example_get_apr.
163+
All examples except `Example_get_apr` and `Example_get_apr_by_block` require an already produced APR, such as those created by `Example_get_apr*`.
117164

118165
For tutorial on how to use the examples, and explanation of data-structures see [the library guide](./docs/lib_guide.pdf).
119166

120167
## LibAPR Tests
121168

122-
The testing framework can be turned on by adding -DAPR_TESTS=ON to the cmake command. All tests can then be run by executing on the command line your build folder.
169+
The testing framework can be turned on by adding -DAPR_TESTS=ON to the cmake command. All tests can then be run by executing
123170
```
124171
ctest
125172
```
126-
Please let us know by creating an issue, if any of these tests are failing on your machine.
127-
128-
## Python support
129-
130-
Note: These have been updated and externalised, and will be released shortly.
173+
on the command line in your build folder. Please let us know by creating an issue, if any of these tests are failing on your machine.
131174

132175
## Java wrappers
133176

134177
Basic Java wrappers can be found at [LibAPR-java-wrapper](https://github.com/krzysg/LibAPR-java-wrapper)
135178

136179
## Coming soon
137180

138-
* more examples for APR-based filtering and segmentation
139-
* deployment of the Java wrappers to Maven Central so they can be used in your project directly
140-
* support for loading the APR in [Fiji](https://fiji.sc), including [scenery-based](https://github.com/scenerygraphics/scenery) 3D rendering
141-
* improved java wrapper support
142-
* CUDA GPU-accelerated APR generation and processing
143-
* Block based decomposition for extremely large images.
181+
* Improved documentation and updated library guide.
182+
* More examples of APR-based image processing and segmentation.
183+
* CUDA GPU-accelerated APR generation and additional processing options.
144184
* Time series support.
145185

146186
## Contact us
147187

148-
If anything is not working as you think it should, or would like it to, please get in touch with us!! Further, if you have a project, or algorithm, you would like to try using the APR for also please get in contact we would be glad to help!
188+
If anything is not working as you think it should, or would like it to, please get in touch with us!! Further, dont hesitate to contact us if you have a project or algorithm you would like to try using the APR for. We would be glad to help!
149189

150190
[![Join the chat at https://gitter.im/LibAPR](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/LibAPR/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
151191

benchmarks/APRBenchHelper.hpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct cmdLineBenchOptions{
2626
int number_reps = 1;
2727
int dimension = 3;
2828
bool no_pixel = false;
29+
bool bench_lr = false;
2930

3031
std::string analysis_file_name = "analysis";
3132
std::string output_dir = "";
@@ -86,6 +87,11 @@ cmdLineBenchOptions read_bench_command_line_options(int argc, char **argv){
8687
result.no_pixel = true;
8788
}
8889

90+
if(command_option_exists(argv, argv + argc, "-bench_lr"))
91+
{
92+
result.bench_lr = true;
93+
}
94+
8995
return result;
9096

9197
}
@@ -259,9 +265,9 @@ class APRBenchHelper {
259265

260266
auto it = apr_input.iterator();
261267

262-
float check_y = log2(1.0f * it.org_dims(0));
263-
float check_x = log2(1.0f * it.org_dims(1));
264-
float check_z = log2(1.0f * it.org_dims(2));
268+
float check_y = log2f(1.0f * it.org_dims(0));
269+
float check_x = log2f(1.0f * it.org_dims(1));
270+
float check_z = log2f(1.0f * it.org_dims(2));
265271

266272
//this function only works for datasets that are powers of 2.
267273
bool pow_2y = (check_y - std::floor(check_y)) == 0;
@@ -314,14 +320,14 @@ class APRBenchHelper {
314320
timer.start_timer("first loop");
315321

316322
//first do the y extension.
317-
for (unsigned int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
323+
for (int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
318324
int z = 0;
319325
int x = 0;
320326

321327
int new_level = level_offset + level;
322328

323329
#ifdef HAVE_OPENMP
324-
#pragma omp parallel for schedule(dynamic) private(z,x) firstprivate(lin_it,lin_it_tiled)
330+
#pragma omp parallel for schedule(dynamic) default(shared) private(z,x) firstprivate(lin_it,lin_it_tiled)
325331
#endif
326332
for (z = 0; z < lin_it.z_num(level); z++) {
327333
for (x = 0; x < lin_it.x_num(level); ++x) {
@@ -358,14 +364,14 @@ class APRBenchHelper {
358364
timer.start_timer("second loop");
359365

360366
//first do the y extension.
361-
for (unsigned int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
367+
for (int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
362368
int z = 0;
363369
int x = 0;
364370

365371
int new_level = level_offset + level;
366372

367373
#ifdef HAVE_OPENMP
368-
#pragma omp parallel for schedule(dynamic) private(z,x) firstprivate(lin_it,lin_it_tiled)
374+
#pragma omp parallel for schedule(dynamic) default(shared) private(z,x) firstprivate(lin_it,lin_it_tiled)
369375
#endif
370376
for (z = 0; z < lin_it.z_num(level); z++) {
371377
for (x = 0; x < lin_it.x_num(level); ++x) {

benchmarks/BenchCudaAccessInit.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ inline void bench_access_partial(APR& apr,ParticleData<partsType>& parts,int num
120120
error_check( cudaGetLastError() )
121121

122122
auto access = apr.gpuAPRHelper();
123-
access.init_gpu(access.total_number_particles(tree_access.level_max()), tree_access);
123+
access.init_gpu(tree_access);
124124
error_check ( cudaDeviceSynchronize() )
125125
error_check( cudaGetLastError() )
126126
}
@@ -137,7 +137,7 @@ inline void bench_access_partial(APR& apr,ParticleData<partsType>& parts,int num
137137

138138
timer2.start_timer("apr access");
139139
auto access = apr.gpuAPRHelper();
140-
access.init_gpu(access.total_number_particles(tree_access.level_max()), tree_access);
140+
access.init_gpu(tree_access);
141141
error_check ( cudaDeviceSynchronize() )
142142
timer2.stop_timer();
143143
apr_time += timer2.timings.back();

benchmarks/BenchFilter.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,24 +37,28 @@ int main(int argc, char **argv) {
3737

3838
benchAPRHelper.generate_dataset(i,apr,parts);
3939

40+
ParticleData<float> floatparts;
41+
floatparts.copy(parts);
42+
4043
//put benchmark funtions here..
4144

4245
//bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,1);
43-
bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
44-
bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
46+
bench_apr_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3, false);
47+
bench_apr_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5, false);
48+
//bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,9, true);
4549

4650
//bench_apr_convolve_pencil(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,1);
47-
bench_apr_convolve_pencil(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
48-
bench_apr_convolve_pencil(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
51+
bench_apr_convolve_pencil(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3, false);
52+
bench_apr_convolve_pencil(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5, false);
4953

5054
if((i==0) && !options.no_pixel){
5155
/*
5256
* Pixel benchmarks (These are content independent)
5357
*/
5458

5559
//bench_pixel_convolve(apr,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,1);
56-
bench_pixel_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
57-
bench_pixel_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
60+
bench_pixel_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
61+
bench_pixel_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
5862

5963
}
6064
}

0 commit comments

Comments
 (0)