Improve BY adjustment performance by approximating harmonic numbers

juliangehring · juliangehring · commit dbea112003e9 · 2017-02-17T00:56:43.000Z
The Benjamini-Yekutieli uses the harmonic number in its step function. The
previous implementation through summation of the individual 1/x terms was very
inefficient for large x. This has been replaced by an approximation of the
harmonic number with constant run time. In comparison with the previous
implementation,
- run time is equivalent for small n
- run time is reduced by several orders of magnitude for large n
- the approximated values are within one order of magnitude to floating point precision
compared to the exact numbers, similarly to the precision obtained through summation.
diff --git a/src/pval-adjustment.jl b/src/pval-adjustment.jl
@@ -87,7 +87,7 @@ function benjamini_yekutieli(pValues::PValues, n::Integer)
     return min(sortedPValues[originalOrder], 1)
 end
 
-benjamini_yekutieli_step(p::AbstractFloat, i::Int, k::Int, n::Int) = p * sum(1./(1:n))*n/(k-i)
+benjamini_yekutieli_step(p::AbstractFloat, i::Int, k::Int, n::Int) = p * harmonic_number(n) * n/(k-i)
 
 
 # Benjamini-Liu
@@ -261,3 +261,6 @@ function check_number_tests(k::Integer, n::Integer)
         throw(ArgumentError(msg))
     end
 end
+
+
+harmonic_number(n::Integer) =  digamma(n+1) + γ
diff --git a/test/test-utils.jl b/test/test-utils.jl
@@ -103,6 +103,24 @@ using Base.Test
     end
 
 
+    @testset "harmonic_number" begin
+
+        # Exact computation as reference
+        harm_n_exact(n::Integer) = sum([Rational(1, i) for i in 1:BigInt(n)])
+
+        n = [1:100; 200:200:1000; 10000]
+
+        max_d = 0.0
+        for i in n
+            hn1 = MultipleTesting.harmonic_number(i)
+            hn2 = harm_n_exact(i)
+            max_d = max(abs(hn1 - hn2), max_d)
+        end
+        # approximation error in the range of floating point inaccuracy
+        @test max_d < (10*eps())
+
+    end
+
 end
 
 end