Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/src/complexity_measures.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@

## Approximate entropy

## Reverse dispersion entropy

```@docs
reverse_dispersion
```

## Disequilibrium
6 changes: 6 additions & 0 deletions docs/src/probabilities.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ SymbolicPermutation
SpatialSymbolicPermutation
```

## Dispersion (symbolic)

```@docs
Dispersion
```

## Visitation frequency (binning)

```@docs
Expand Down
2 changes: 2 additions & 0 deletions src/Entropies.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ include("symbolization/symbolize.jl")
include("probabilities.jl")
include("probabilities_estimators/probabilities_estimators.jl")
include("entropies/entropies.jl")
include("complexity_measures/complexity_measures.jl")

include("deprecations.jl")


Expand Down
1 change: 1 addition & 0 deletions src/complexity_measures/complexity_measures.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include("reverse_dispersion_entropy.jl")
35 changes: 35 additions & 0 deletions src/complexity_measures/reverse_dispersion_entropy.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
export reverse_dispersion

function distance_to_whitenoise(p::Probabilities, N, m)
# We can safely skip non-occurring symbols, because they don't contribute
# to the sum in eq. 3 in Li et al. (2019)
return sum(abs2, p) - 1/(N^m)
end

"""
reverse_dispersion(x::AbstractVector{T}, s = GaussianSymbolization(5), m = 2, τ = 1)
Compute the reverse dispersion entropy complexity measure (Li et al., 2019)[^Li2019],
which measures how far from being white noise a signal is.
[^Li2019]: Li, Y., Gao, X., & Wang, L. (2019). Reverse dispersion entropy: a new
complexity measure for sensor signal. Sensors, 19(23), 5203.
"""
function reverse_dispersion(x::AbstractVector{T}; s = GaussianSymbolization(5),
m = 2, τ = 1, normalize = true) where T <: Real
est = Dispersion= τ, m = m, s = s)
p = probabilities(x, est)

# The following step combines distance information with the probabilities, so
# from here on, it is not possible to use `renyi_entropy` or similar methods, because
# we're not dealing with probabilities anymore.
Hrde = distance_to_whitenoise(p, s.n_categories, m)

if normalize
# The factor `f` considers *all* possible symbols (also non-occurring)
f = s.n_categories^m
return Hrde / (1 - (1/f))
else
return Hrde
end
end
61 changes: 0 additions & 61 deletions src/entropies/direct_entropies/entropy_dispersion.jl

This file was deleted.

1 change: 0 additions & 1 deletion src/entropies/entropies.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@ include("tsallis.jl")
include("shannon.jl")
include("convenience_definitions.jl")
include("direct_entropies/nearest_neighbors/nearest_neighbors.jl")
include("direct_entropies/entropy_dispersion.jl")

# TODO: What else is included here from direct entropies?
113 changes: 113 additions & 0 deletions src/probabilities_estimators/dispersion/dispersion.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
using DelayEmbeddings
using StatsBase

export Dispersion

"""
Dispersion(; s = GaussianSymbolization(5), m = 2, τ = 1, check_unique = true,
normalize = true)

A probability estimator based on dispersion patterns, originally used by
Rostaghi & Azami, 2016[^Rostaghi2016] to compute the "dispersion entropy", which
characterizes the complexity and irregularity of a time series.

Relative frequencies of dispersion patterns are computed using the symbolization scheme
`s` with embedding dimension `m` and embedding delay `τ`. Recommended parameter
values[^Li2018] are `m ∈ [2, 3]`, `τ = 1`, and `n_categories ∈ [3, 4, …, 8]` for the
Gaussian mapping (defaults to 5).

If `normalize == true`, then when used in combination with [`renyi_entropy`](@ref)
(see below), the the dispersion entropy is normalized to `[0, 1]`.

If `check_unique == true` (default), then it is checked that the input has
more than one unique value. If `check_unique == false` and the input only has one
unique element, then a `InexactError` is thrown when trying to compute probabilities.

## Probabilities vs dispersion entropy

The original dispersion entropy paper does not discuss the technique as a probability
estimator per se, but does require a step where probabilities over dispersion patterns
are explicitly computed. Hence, we provide `Dispersion` as a probability estimator.

To compute (normalized) dispersion entropy of order `q` to a given `base` on the
univariate input time series `x`, do:

```julia
renyi_entropy(x, Dispersion(normalize = true), base = base, q = q)
```

## Data requirements

The input must have more than one unique element for the Gaussian mapping to be
well-defined. Li et al. (2018) recommends that `x` has at least 1000 data points.

[^Rostaghi2016]: Rostaghi, M., & Azami, H. (2016). Dispersion entropy: A measure for time-series analysis. IEEE Signal Processing Letters, 23(5), 610-614.
[^Li2018]: Li, G., Guan, Q., & Yang, H. (2018). Noise reduction method of underwater acoustic signals based on CEEMDAN, effort-to-compress complexity, refined composite multiscale dispersion entropy and wavelet threshold denoising. Entropy, 21(1), 11.
"""
Base.@kwdef struct Dispersion <: ProbabilitiesEstimator
s = GaussianSymbolization(n_categories = 5)
m = 2
τ = 1
check_unique = false
normalize = true
end

export entropy_dispersion

"""
embed_symbols(s::AbstractVector{T}, m, τ) {where T} → Dataset{m, T}

From the symbols `sᵢ ∈ s`, create the embedding vectors (with dimension `m` and lag `τ`):

```math
s_i^D = \\{s_i, s_{i+\\tau}, \\ldots, s_{i+(m-1)\\tau} \\}
```,

where ``i = 1, 2, \\ldots, N - (m - 1)\\tau`` and `N = length(s)`.
"""
function embed_symbols(symbols::AbstractVector, m, τ)
return embed(symbols, m, τ)
end

function dispersion_histogram(x::AbstractDataset, N, m, τ)
return _non0hist(x.data, (N - (m - 1)*τ))
end

function probabilities(x::AbstractVector, est::Dispersion)
if est.check_unique
if length(unique(x)) == 1
symbols = repeat([1], length(x))
else
symbols = symbolize(x, est.s)
end
else
symbols = symbolize(x, est.s)
end
N = length(x)

# We must use genembed, not embed, to make sure the zero lag is included
m, τ = est.m, est.τ
τs = tuple((x for x in 0:-τ:-(m-1)*τ)...)
dispersion_patterns = genembed(symbols, τs, ones(m))
hist = dispersion_histogram(dispersion_patterns, N, est.m, est.τ)
p = Probabilities(hist)
end

function entropy_renyi(x::AbstractVector, est::Dispersion; q = 1, base = MathConstants.e)
p = probabilities(x, est)
dh = entropy_renyi(p, q = q, base = base)

n, m = est.s.n_categories, est.m

if est.normalize
# TODO: is is possible to normalize for general order `q`? Need to have a literature
# dive or figure it out manually.
if q == 1
return dh / log(base, n^m)
else
throw(ArgumentError("Normalization is not well defined when q != 1."))
end
else
return dh
end
end
3 changes: 2 additions & 1 deletion src/probabilities_estimators/probabilities_estimators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ include("rectangular_binning/rectangular_binning.jl")
include("permutation_ordinal/symbolic.jl")
include("kernel_density.jl")
include("timescales/timescales.jl")
include("transfer_operator/transfer_operator.jl")
include("transfer_operator/transfer_operator.jl")
include("dispersion/dispersion.jl")
7 changes: 7 additions & 0 deletions test/complexity_measures.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
using Entropies, Test

@testset begin "Reverse dispersion entropy"
x = rand(100)
@test reverse_dispersion(x) isa Real
@test 0.0 <= reverse_dispersion(x, normalize = true) <= 1.0
end
35 changes: 35 additions & 0 deletions test/dispersion.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using Entropies, Test

@testset "Dispersion methods" begin
x = rand(100)

@testset "Dispersion" begin

@testset "Internals" begin
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The property robust of a good test suite means that one tests that the expected functionality is met, not how it is met. Subsequently, only exported API should be used in the test suite. The internals are not tested, their final outcome is. See also #85

# Li et al. (2018) recommends using at least 1000 data points when estimating
# dispersion entropy.
x = rand(1000)
n_categories = 4
m = 4
τ = 1
s = GaussianSymbolization(n_categories = n_categories)

# Symbols should be in the set [1, 2, …, n_categories].
symbols = Entropies.symbolize(x, s)
@test all([s ∈ collect(1:n_categories) for s in symbols])

# Dispersion patterns should have a normalized histogram that sums to 1.0.
dispersion_patterns = DelayEmbeddings.embed(symbols, m, τ)
hist = Entropies.dispersion_histogram(dispersion_patterns, length(x), m, τ)
@test sum(hist) ≈ 1.0
end

ps = probabilities(x, Dispersion())
@test ps isa Probabilities

de_norm = entropy_renyi(x, Dispersion(normalize = true), q = 1, base = 2)
@test 0.0 <= de_norm <= 1.0

@test_throws ArgumentError entropy_renyi(x, Dispersion(normalize = true), q = 2)
end
end
27 changes: 3 additions & 24 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ using Neighborhood: KDTree, BruteForce
# TODO: This is how the tests should look like in the end:
defaultname(file) = splitext(basename(file))[1]
testfile(file, testname=defaultname(file)) = @testset "$testname" begin; include(file); end
@testset "Entopies.jl" begin
@testset "Entropies.jl" begin
testfile("timescales.jl")
testfile("dispersion.jl")
testfile("complexity_measures.jl")
end

@testset "Histogram estimation" begin
Expand Down Expand Up @@ -325,29 +327,6 @@ end
end
end

@testset "Dispersion entropy" begin
# Li et al. (2018) recommends using at least 1000 data points when estimating
# dispersion entropy.
x = rand(1000)
n_categories = 4
m = 4
τ = 1
s = GaussianSymbolization(n_categories = n_categories)

# Symbols should be in the set [1, 2, …, n_categories].
symbols = Entropies.symbolize(x, s)
@test all([s ∈ collect(1:n_categories) for s in symbols])

# Dispersion patterns should have a normalized histogram that sums to 1.0.
dispersion_patterns = DelayEmbeddings.embed(symbols, m, τ)
hist = Entropies.dispersion_histogram(dispersion_patterns, length(x), m, τ)
@test sum(hist) ≈ 1.0

de = entropy_dispersion(x, s, m = 4, τ = 1)
@test typeof(de) <: Real
@test de >= 0.0
end

@testset "Tsallis" begin
p = Probabilities(repeat([1/5], 5))
@assert round(entropy_tsallis(p, q = -1/2, k = 1), digits = 2) ≈ 6.79
Expand Down