AdaptiveParticles
diff --git a/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions b/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 13 additions & 11 deletions b/‎CMakeLists.txt‎
Lines changed: 13 additions & 11 deletions
diff --git a/‎README.md‎
Lines changed: 73 additions & 33 deletions b/‎README.md‎
Lines changed: 73 additions & 33 deletions
diff --git a/‎benchmarks/APRBenchHelper.hpp‎
Lines changed: 13 additions & 7 deletions b/‎benchmarks/APRBenchHelper.hpp‎
Lines changed: 13 additions & 7 deletions
diff --git a/‎benchmarks/BenchCudaAccessInit.cpp‎
Lines changed: 2 additions & 2 deletions b/‎benchmarks/BenchCudaAccessInit.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/BenchFilter.cpp‎
Lines changed: 10 additions & 6 deletions b/‎benchmarks/BenchFilter.cpp‎
Lines changed: 10 additions & 6 deletions
@@ -7,6 +7,3 @@
 [submodule "external/c-blosc"]
 	path = external/c-blosc
 	url = https://github.com/Blosc/c-blosc
-[submodule "external/pybind11"]
-	path = external/pybind11
-	url = https://github.com/pybind/pybind11.git
@@ -153,21 +153,28 @@ add_library(aprObjLib OBJECT ${SOURCE_FILES} ${SOURCE_FILES_RAYCAST})
 if(APR_USE_CUDA)
     message(STATUS "APR: Building CUDA for APR")
     set(CMAKE_CUDA_STANDARD 14)
-    set(CMAKE_CUDA_FLAGS "--default-stream per-thread --cudart shared -g -lineinfo -Xptxas -O3,-v -use_fast_math -DAPR_USE_CUDA")
+    set(CMAKE_CUDA_RUNTIME_LIBRARY "Static")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --default-stream per-thread -Xptxas -v -DAPR_USE_CUDA")
+    set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math") # -lineinfo for profiling
+    set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g -G")
     if(APR_BENCHMARK)
         set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DAPR_BENCHMARK")
     endif()
     enable_language(CUDA)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DAPR_USE_CUDA")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DAPR_USE_CUDA")
-    set(APR_CUDA_SROUCE_FILES
+    set(APR_CUDA_SOURCE_FILES
 	        src/algorithm/ComputeGradientCuda.cu
             src/data_structures/Mesh/PixelData.cu
             src/algorithm/LocalIntensityScale.cu
             src/algorithm/OVPC.cu
             src/data_structures/APR/access/GPUAccess.cu
+            src/numerics/miscCuda.cu
             src/numerics/APRDownsampleGPU.cu
-            src/numerics/APRIsoConvGPU.cu
+            src/numerics/PixelNumericsGPU.cu
+            src/numerics/APRIsoConvGPU333.cu
+            src/numerics/APRIsoConvGPU555.cu
+            src/numerics/APRNumericsGPU.cu
             )
     include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
 
@@ -176,7 +183,7 @@ endif()
 if(APR_BUILD_STATIC_LIB)
     # generate static library used as a intermediate step in generating fat lib
     set(STATIC_TARGET_NAME staticLib)
-    add_library(${STATIC_TARGET_NAME} STATIC $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SROUCE_FILES})
+    add_library(${STATIC_TARGET_NAME} STATIC $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SOURCE_FILES})
     target_compile_features(${STATIC_TARGET_NAME} PUBLIC cxx_std_14)
     set_target_properties(${STATIC_TARGET_NAME} PROPERTIES OUTPUT_NAME ${LIBRARY_NAME})
     set_target_properties(${STATIC_TARGET_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION OFF)
@@ -192,7 +199,7 @@ endif()
 if(APR_BUILD_SHARED_LIB)
 # generate fat shared library
     set(SHARED_TARGET_NAME sharedLib)
-    add_library(${SHARED_TARGET_NAME} SHARED $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SROUCE_FILES})
+    add_library(${SHARED_TARGET_NAME} SHARED $<TARGET_OBJECTS:aprObjLib> ${APR_CUDA_SOURCE_FILES})
 
     target_include_directories(${SHARED_TARGET_NAME} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src> $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>)
     set_target_properties(${SHARED_TARGET_NAME} PROPERTIES OUTPUT_NAME ${LIBRARY_NAME})
@@ -309,11 +316,6 @@ endif(APR_TESTS)
 ###############################################################################
 
 if(APR_BENCHMARK)
-    message(STATUS "APR: Benchmarking performance")
+    message(STATUS "APR: Building performance benchmarks")
     add_subdirectory(benchmarks)
 endif(APR_BENCHMARK)
-
-###############################################################################
-# PYTHON wrappers These are now external please ask us for access
-###############################################################################
-
 
@@ -9,30 +9,69 @@ Labeled Zebrafish nuclei: Gopi Shah, Huisken Lab ([MPI-CBG](https://www.mpi-cbg.
 [![Build Status](https://travis-ci.org/AdaptiveParticles/LibAPR.svg?branch=master)](https://travis-ci.org/AdaptiveParticles/LibAPR)
 [![DOI](https://zenodo.org/badge/70479293.svg)](https://zenodo.org/badge/latestdoi/70479293)
 
+
+## Python support
+
+We now provide python wrappers in a separate repository [PyLibAPR](https://github.com/AdaptiveParticles/PyLibAPR)
+
+In addition to providing wrappers for most of the LibAPR functionality, the Python library contains a number of new features that simplify the generation and handling of the APR. For example:
+
+* Interactive APR conversion
+* Interactive APR z-slice viewer
+* Interactive APR raycast (maximum intensity projection) viewer
+* Interactive lossy compression of particle intensities
+
+
+## Version 2.0 release notes
+
+The library has changed significantly since release 1.1. __There are changes to IO and iteration that are not compatible 
+with the older version__. 
+
+* New (additional) linear access data structure, explicitly storing coordinates in the sparse dimension, 
+  similar to Compressed Sparse Row.
+* Block-based decomposition of the APR generation pipeline, allowing conversion of very large images.
+* Expanded and improved functionality for image processing directly on the APR:
+  * APR filtering (spatial convolutions).
+  * [APRNumerics](./src/numerics/APRNumerics.hpp) module, including e.g. gradient computations and Richardson-Lucy deconvolution.
+  * CUDA GPU-accelerated convolutions and RL deconvolution (currently only supports dense 3x3x3 and 5x5x5 stencils)
+
+
 ## Dependencies
 
 * HDF5 1.8.20 or higher
-* OpenMP > 3.0 (optional, but suggested)
+* OpenMP > 3.0 (optional, but recommended)
 * CMake 3.6 or higher
 * LibTIFF 4.0 or higher
 
-NB: This update to 2.0 introduces changes to IO and iteration that are not compatable with old versions.
-
 ## Building
 
-The repository requires sub-modules, so the repository needs to be cloned recursively:
+The repository requires submodules, and needs to be cloned recursively:
 
 ```
-git clone --recursive https://github.com/cheesema/LibAPR
+git clone --recursive https://github.com/AdaptiveParticles/LibAPR.git
 ```
 
-If you need to update your clone at any point later, run
+### CMake build options
 
+Several CMake options can be given to control the build. Use the `-D` argument to set each
+desired option. For example, to disable OpenMP, change the cmake calls below to
 ```
-git pull
-git submodule update
+cmake -DAPR_USE_OPENMP=OFF ..
 ```
 
+| Option | Description | Default value |
+|:--|:--|:--|
+| APR_BUILD_SHARED_LIB | Build shared library | ON |
+| APR_BUILD_STATIC_LIB | Build static library | OFF |
+| APR_BUILD_EXAMPLES | Build executable examples | OFF |
+| APR_TESTS | Build unit tests | OFF |
+| APR_BENCHMARK | Build executable performance benchmarks | OFF |
+| APR_USE_LIBTIFF | Enable LibTIFF (Required for tests and examples) | ON |
+| APR_PREFER_EXTERNAL_GTEST | Use installed gtest instead of included sources | OFF |
+| APR_PREFER_EXTERNAL_BLOSC | Use installed blosc instead of included sources | OFF |
+| APR_USE_OPENMP | Enable multithreading via OpenMP | ON |
+| APR_USE_CUDA | Enable CUDA (Under development - APR conversion pipeline is currently not working with CUDA enabled) | OFF |
+
 ### Building on Linux
 
 On Ubuntu, install the `cmake`, `build-essential`, `libhdf5-dev` and `libtiff5-dev` packages (on other distributions, refer to the documentation there, the package names will be similar). OpenMP support is provided by the GCC compiler installed as part of the `build-essential` package.
@@ -46,11 +85,7 @@ cmake ..
 make
 ```
 
-This will create the `libapr.so` library in the `build` directory, as well as all of the examples.
-
-### Docker build
-
-We provide a working Dockerfile that install the library within the image on a separate [repo](https://github.com/MSusik/libaprdocker).
+This will create the `libapr.so` library in the `build` directory.
 
 ### Building on OSX
 
@@ -67,7 +102,8 @@ cmake ..
 make
 ```
 
-This will create the `libapr.dylib` library in the `build` directory, as well as all of the examples.
+This will create the `libapr.dylib` library in the `build` directory.
+
 
 In case you want to use the homebrew-installed clang (OpenMP support), modify the call to `cmake` above to
 
@@ -96,56 +132,60 @@ cmake -G "Visual Studio 15 2017 Win64" -DTIFF_INCLUDE_DIR="C:/Program Files/tiff
 cmake --build . --config Debug
 ```
 
-This will set the appropriate hints for Visual Studio to find both LibTIFF and HDF5. This will create the `apr.dll` library in the `build/Debug` directory, as well as all of the examples. If you need a `Release` build, run `cmake --build . --config Release` from the `build` directory.
+This will set the appropriate hints for Visual Studio to find both LibTIFF and HDF5. This will create the `apr.dll` library in the `build/Debug` directory. If you need a `Release` build, run `cmake --build . --config Release` from the `build` directory.
+
+### Docker build
+
+We provide a working Dockerfile that installs the library within the image in a separate [repository](https://github.com/MSusik/libaprdocker).
+
+Note: not recently tested.
 
 ## Examples and Documentation
-These examples can be turned on by adding -DAPR_BUILD_EXAMPLES=ON to the cmake command.
 
-There are nine basic examples, that show how to generate and compute with the APR:
+There are 12 basic examples, that show how to generate and compute with the APR. These can be built by adding 
+-DAPR_BUILD_EXAMPLES=ON to the cmake command.
 
 | Example | How to ... |
 |:--|:--|
 | [Example_get_apr](./examples/Example_get_apr.cpp) | create an APR from a TIFF and store as hdf5. |
-| [Example_apr_iterate](./examples/Example_apr_iterate.cpp) | iterate through a given APR. |
+| [Example_get_apr_by_block](./examples/Example_get_apr_by_block.cpp) | create an APR from a (potentially large) TIFF, by decomposing it into smaller blocks, and store as hdf5.
+| [Example_apr_iterate](./examples/Example_apr_iterate.cpp) | iterate over APR particles and their spatial properties. |
+| [Example_apr_tree](./examples/Example_apr_tree.cpp) | iterate over interior APR tree particles and their spatial properties. |
 | [Example_neighbour_access](./examples/Example_neighbour_access.cpp) | access particle and face neighbours. |
 | [Example_compress_apr](./examples/Example_compress_apr.cpp) |  additionally compress the intensities stored in an APR. |
 | [Example_random_access](./examples/Example_random_access.cpp) | perform random access operations on particles. |
-| [Example_ray_cast](./examples/Example_ray_cast.cpp) | perform a maximum intensity projection ray cast directly on the APR data structures read from an APR. |
+| [Example_ray_cast](./examples/Example_ray_cast.cpp) | perform a maximum intensity projection ray cast directly on the APR. |
 | [Example_reconstruct_image](./examples/Example_reconstruct_image.cpp) | reconstruct a pixel image from an APR. |
+| [Example_compute_gradient](./examples/Example_compute_gradient.cpp) | compute the gradient magnitude of an APR. |
+| [Example_apr_filter](./examples/Example_apr_filter.cpp) | apply a filter (convolution) to an APR. |
+| [Example_apr_deconvolution](./examples/Example_apr_deconvolution.cpp) | perform Richardson-Lucy deconvolution on an APR. |
 
-All examples except Example_get_apr require an already produced APR, such as those created by Example_get_apr.
+All examples except `Example_get_apr` and `Example_get_apr_by_block` require an already produced APR, such as those created by `Example_get_apr*`.
 
 For tutorial on how to use the examples, and explanation of data-structures see [the library guide](./docs/lib_guide.pdf).
 
 ## LibAPR Tests
 
-The testing framework can be turned on by adding -DAPR_TESTS=ON to the cmake command. All tests can then be run by executing on the command line your build folder.
+The testing framework can be turned on by adding -DAPR_TESTS=ON to the cmake command. All tests can then be run by executing
 ```
 ctest
 ```
-Please let us know by creating an issue, if any of these tests are failing on your machine.
-
-## Python support
-
-Note: These have been updated and externalised, and will be released shortly.
+on the command line in your build folder. Please let us know by creating an issue, if any of these tests are failing on your machine.
 
 ## Java wrappers
 
 Basic Java wrappers can be found at [LibAPR-java-wrapper](https://github.com/krzysg/LibAPR-java-wrapper)
 
 ## Coming soon
 
-* more examples for APR-based filtering and segmentation
-* deployment of the Java wrappers to Maven Central so they can be used in your project directly
-* support for loading the APR in [Fiji](https://fiji.sc), including [scenery-based](https://github.com/scenerygraphics/scenery) 3D rendering
-* improved java wrapper support
-* CUDA GPU-accelerated APR generation and processing
-* Block based decomposition for extremely large images.
+* Improved documentation and updated library guide.
+* More examples of APR-based image processing and segmentation.
+* CUDA GPU-accelerated APR generation and additional processing options.
 * Time series support.
 
 ## Contact us
 
-If anything is not working as you think it should, or would like it to, please get in touch with us!! Further, if you have a project, or algorithm, you would like to try using the APR for also please get in contact we would be glad to help!
+If anything is not working as you think it should, or would like it to, please get in touch with us!! Further, dont hesitate to contact us if you have a project or algorithm you would like to try using the APR for. We would be glad to help!
 
 [![Join the chat at https://gitter.im/LibAPR](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/LibAPR/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 
 
@@ -26,6 +26,7 @@ struct cmdLineBenchOptions{
     int number_reps = 1;
     int dimension = 3;
     bool no_pixel = false;
+    bool bench_lr = false;
 
     std::string analysis_file_name = "analysis";
     std::string output_dir = "";
@@ -86,6 +87,11 @@ cmdLineBenchOptions read_bench_command_line_options(int argc, char **argv){
         result.no_pixel = true;
     }
 
+    if(command_option_exists(argv, argv + argc, "-bench_lr"))
+    {
+        result.bench_lr = true;
+    }
+
     return result;
 
 }
@@ -259,9 +265,9 @@ class APRBenchHelper {
 
         auto it = apr_input.iterator();
 
-        float check_y = log2(1.0f * it.org_dims(0));
-        float check_x = log2(1.0f * it.org_dims(1));
-        float check_z = log2(1.0f * it.org_dims(2));
+        float check_y = log2f(1.0f * it.org_dims(0));
+        float check_x = log2f(1.0f * it.org_dims(1));
+        float check_z = log2f(1.0f * it.org_dims(2));
 
         //this function only works for datasets that are powers of 2.
         bool pow_2y = (check_y - std::floor(check_y)) == 0;
@@ -314,14 +320,14 @@ class APRBenchHelper {
         timer.start_timer("first loop");
 
         //first do the y extension.
-        for (unsigned int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
+        for (int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
             int z = 0;
             int x = 0;
 
             int new_level = level_offset + level;
 
 #ifdef HAVE_OPENMP
-#pragma omp parallel for schedule(dynamic) private(z,x) firstprivate(lin_it,lin_it_tiled)
+#pragma omp parallel for schedule(dynamic) default(shared) private(z,x) firstprivate(lin_it,lin_it_tiled)
 #endif
             for (z = 0; z < lin_it.z_num(level); z++) {
                 for (x = 0; x < lin_it.x_num(level); ++x) {
@@ -358,14 +364,14 @@ class APRBenchHelper {
         timer.start_timer("second loop");
 
         //first do the y extension.
-        for (unsigned int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
+        for (int level = lin_it.level_min(); level <= lin_it.level_max(); ++level) {
             int z = 0;
             int x = 0;
 
             int new_level = level_offset + level;
 
 #ifdef HAVE_OPENMP
-#pragma omp parallel for schedule(dynamic) private(z,x) firstprivate(lin_it,lin_it_tiled)
+#pragma omp parallel for schedule(dynamic) default(shared) private(z,x) firstprivate(lin_it,lin_it_tiled)
 #endif
             for (z = 0; z < lin_it.z_num(level); z++) {
                 for (x = 0; x < lin_it.x_num(level); ++x) {
 
@@ -120,7 +120,7 @@ inline void bench_access_partial(APR& apr,ParticleData<partsType>& parts,int num
         error_check( cudaGetLastError() )
 
         auto access = apr.gpuAPRHelper();
-        access.init_gpu(access.total_number_particles(tree_access.level_max()), tree_access);
+        access.init_gpu(tree_access);
         error_check ( cudaDeviceSynchronize() )
         error_check( cudaGetLastError() )
     }
@@ -137,7 +137,7 @@ inline void bench_access_partial(APR& apr,ParticleData<partsType>& parts,int num
 
         timer2.start_timer("apr access");
         auto access = apr.gpuAPRHelper();
-        access.init_gpu(access.total_number_particles(tree_access.level_max()), tree_access);
+        access.init_gpu(tree_access);
         error_check ( cudaDeviceSynchronize() )
         timer2.stop_timer();
         apr_time += timer2.timings.back();
 
@@ -37,24 +37,28 @@ int main(int argc, char **argv) {
 
         benchAPRHelper.generate_dataset(i,apr,parts);
 
+        ParticleData<float> floatparts;
+        floatparts.copy(parts);
+
         //put benchmark funtions here..
 
         //bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,1);
-        bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
-        bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
+        bench_apr_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3, false);
+        bench_apr_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5, false);
+        //bench_apr_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,9, true);
 
         //bench_apr_convolve_pencil(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,1);
-        bench_apr_convolve_pencil(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
-        bench_apr_convolve_pencil(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
+        bench_apr_convolve_pencil(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3, false);
+        bench_apr_convolve_pencil(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5, false);
 
         if((i==0) && !options.no_pixel){
             /*
             * Pixel benchmarks (These are content independent)
             */
 
             //bench_pixel_convolve(apr,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,1);
-            bench_pixel_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
-            bench_pixel_convolve(apr,parts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
+            bench_pixel_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,3);
+            bench_pixel_convolve(apr,floatparts,benchAPRHelper.get_number_reps(),benchAPRHelper.analysisData,5);
 
         }
     }