Skip to content

Commit b065033

Browse files
authored
Add multivariate kernel density estimator (#1013)
1 parent 6869bdc commit b065033

File tree

6 files changed

+189
-2
lines changed

6 files changed

+189
-2
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,11 @@ for (let i = 0; i < n; i++) {
127127
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MARS, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
128128
| interpolation | Nearest neighbor, IDW, (Spherical) Linear, Brahmagupta, Logarithmic, Cosine, (Inverse) Smoothstep, Cubic, (Centripetal) Catmull-Rom, Hermit, Polynomial, Lagrange, Trigonometric, Spline, RBF Network, Akima, Natural neighbor, Delaunay |
129129
| learning to rank | Ordered logistic, Ordered probit, PRank, OAP-BPM, RankNet |
130-
| anomaly detection | Percentile, MAD, Tukey's fences, Grubbs's test, Thompson test, Tietjen Moore test, Generalized ESD, Hotelling, MT, MCD, k-nearest neighbor, LOF, COF, ODIN, LDOF, INFLO, LOCI, LoOP, RDF, LDF, KDEOS, RDOS, NOF, RKOF, ABOD, PCA, OCSVM, KDE, GMM, Isolation forest, Autoencoder, GAN |
130+
| anomaly detection | Percentile, MAD, Tukey's fences, Grubbs's test, Thompson test, Tietjen Moore test, Generalized ESD, Hotelling, MT, MCD, k-nearest neighbor, LOF, COF, ODIN, LDOF, INFLO, LOCI, LoOP, RDF, LDF, KDEOS, RDOS, NOF, RKOF, ABOD, PCA, OCSVM, (Multivariate) KDE, GMM, Isolation forest, Autoencoder, GAN |
131131
| dimensionality reduction | Random projection, (Dual / Kernel / Incremental / Probabilistic) PCA, GPLVM, LSA, MDS, Linear discriminant analysis, NCA, ICA, Principal curve, Sammon, FastMap, Sliced inverse regression, LLE, HLLE, MLLE, Laplacian eigenmaps, Isomap, LTSA, Diffusion map, SNE, t-SNE, UMAP, SOM, GTM, NMF, MOD, K-SVD, Autoencoder, VAE |
132132
| feature selection | Mutual information, Ridge, Lasso, Elastic net, Decision tree, NCA |
133133
| transformation | Box-Cox, Yeo-Johnson |
134-
| density estimation | Histogram, Average shifted histogram, Polynomial histogram, Maximum likelihood, Kernel density estimation, k-nearest neighbor, Naive Bayes, GMM, HMM |
134+
| density estimation | Histogram, Average shifted histogram, Polynomial histogram, Maximum likelihood, (Multivariate) Kernel density estimation, k-nearest neighbor, Naive Bayes, GMM, HMM |
135135
| generate | MH, Slice sampling, GMM, GBRBM, HMM, VAE, GAN, NICE, Diffusion |
136136
| smoothing | (Linear weighted / Triangular / Cumulative) Moving average, Exponential average, Moving median, KZ filter, Savitzky Golay filter, Hampel filter, Kalman filter, Particle filter, Lowpass filter, Bessel filter, Butterworth filter, Chebyshev filter, Elliptic filter |
137137
| timeseries prediction | Holt winters, AR, ARMA, SDAR, VAR, Kalman filter, MLP, RNN |

js/model_selector.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ const AIMethods = [
400400
{ value: 'pca', title: 'PCA' },
401401
{ value: 'ocsvm', title: 'One class SVM' },
402402
{ value: 'kernel_density_estimator', title: 'Kernel Density Estimator' },
403+
{ value: 'multivariate_kernel_density_estimator', title: 'Multivariate Kernel Density Estimator' },
403404
{ value: 'gmm', title: 'Gaussian mixture model' },
404405
{ value: 'isolation_forest', title: 'Isolation Forest' },
405406
{ value: 'autoencoder', title: 'Autoencoder' },
@@ -467,6 +468,7 @@ const AIMethods = [
467468
{ value: 'polynomial_histogram', title: 'Polynomial Histogram' },
468469
{ value: 'maximum_likelihood', title: 'Maximum Likelihood' },
469470
{ value: 'kernel_density_estimator', title: 'Kernel Density Estimator' },
471+
{ value: 'multivariate_kernel_density_estimator', title: 'Multivariate Kernel Density Estimator' },
470472
{ value: 'knearestneighbor', title: 'k nearest neighbor' },
471473
{ value: 'naive_bayes', title: 'Naive Bayes' },
472474
{ value: 'gmm', title: 'Gaussian mixture model' },
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import MultivariateKernelDensityEstimator from '../../lib/model/multivariate_kernel_density_estimator.js'
2+
import Controller from '../controller.js'
3+
import { specialCategory } from '../utils.js'
4+
5+
export default function (platform) {
6+
platform.setting.ml.usage = 'Click and add data point. Next, click "Fit" button.'
7+
platform.setting.ml.reference = {
8+
title: 'Multivariate kernel density estimation (Wikipedia)',
9+
url: 'https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation',
10+
}
11+
const controller = new Controller(platform)
12+
const fitModel = () => {
13+
const model = new MultivariateKernelDensityEstimator(method.value)
14+
model.fit(platform.trainInput)
15+
16+
const pred = model.predict(platform.testInput(8))
17+
if (platform.task === 'DE') {
18+
const min = Math.min(...pred)
19+
const max = Math.max(...pred)
20+
platform.testResult(pred.map(v => specialCategory.density((v - min) / (max - min))))
21+
} else {
22+
const y = model.predict(platform.trainInput)
23+
platform.trainResult = y.map(v => v < threshold.value)
24+
platform.testResult(pred.map(v => v < threshold.value))
25+
}
26+
}
27+
28+
const method = controller.select(['silverman', 'scott'])
29+
let threshold = null
30+
if (platform.task === 'AD') {
31+
threshold = controller.input
32+
.number({ label: ' threshold = ', min: 0, max: 10, step: 0.01, value: 0.3 })
33+
.on('change', fitModel)
34+
}
35+
controller.input.button('Fit').on('click', () => fitModel())
36+
}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import Matrix from '../util/matrix.js'
2+
3+
/**
4+
* Multivariate kernel density estimator
5+
*/
6+
export default class MultivariateKernelDensityEstimator {
7+
// https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation
8+
/**
9+
* @param {'silverman' | 'scott'} [method] Optimal bandwidth method
10+
*/
11+
constructor(method = 'silverman') {
12+
this._method = method
13+
}
14+
15+
_kernel(invh, sqrtdeth, x) {
16+
const d = x.cols
17+
const k = x.dot(invh)
18+
k.mult(x)
19+
const ks = k.sum(1)
20+
ks.map(v => Math.exp(-v / 2) / ((2 * Math.PI) ** (d / 2) * sqrtdeth))
21+
22+
return ks
23+
}
24+
25+
/**
26+
* Fit model.
27+
* @param {Array<Array<number>>} x Training data
28+
*/
29+
fit(x) {
30+
this._x = Matrix.fromArray(x)
31+
32+
const n = x.length
33+
const d = x[0].length
34+
if (this._method === 'pi') {
35+
throw new Error('Not implemented')
36+
} else if (this._method === 'scv') {
37+
throw new Error('Not implemented')
38+
} else if (this._method === 'silverman') {
39+
const std = this._x.std(0).value
40+
this._h = Matrix.zeros(d, d)
41+
const s = (4 / (d + 2)) ** (1 / (d + 4)) / n ** (1 / (d + 4))
42+
for (let i = 0; i < d; i++) {
43+
this._h.set(i, i, (std[i] * s) ** 2)
44+
}
45+
} else if (this._method === 'scott') {
46+
const std = this._x.std(0).value
47+
this._h = Matrix.zeros(d, d)
48+
const s = 1 / n ** (1 / (d + 4))
49+
for (let i = 0; i < d; i++) {
50+
this._h.set(i, i, (std[i] * s) ** 2)
51+
}
52+
}
53+
54+
this._invh = this._h.inv()
55+
this._hsqrtdet = Math.sqrt(this._h.det())
56+
}
57+
58+
/**
59+
* Returns probabilities of the datas.
60+
* @param {Array<Array<number>>} x Sample data
61+
* @returns {number[]} Predicted values
62+
*/
63+
probability(x) {
64+
return x.map(v => {
65+
const xi = new Matrix(1, v.length, v)
66+
xi.isub(this._x)
67+
68+
return this._kernel(this._invh, this._hsqrtdet, xi).mean()
69+
})
70+
}
71+
72+
/**
73+
* Returns probabilities of the datas.
74+
* @param {Array<Array<number>>} x Sample data
75+
* @returns {number[]} Predicted values
76+
*/
77+
predict(x) {
78+
return this.probability(x)
79+
}
80+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { getPage } from '../helper/browser'
2+
3+
describe('density estimation', () => {
4+
/** @type {Awaited<ReturnType<getPage>>} */
5+
let page
6+
beforeEach(async () => {
7+
page = await getPage()
8+
const taskSelectBox = page.locator('#ml_selector dl:first-child dd:nth-child(5) select')
9+
await taskSelectBox.selectOption('DE')
10+
const modelSelectBox = page.locator('#ml_selector .model_selection #mlDisp')
11+
await modelSelectBox.selectOption('multivariate_kernel_density_estimator')
12+
})
13+
14+
afterEach(async () => {
15+
await page?.close()
16+
})
17+
18+
test('initialize', async () => {
19+
const methodMenu = page.locator('#ml_selector #method_menu')
20+
const buttons = methodMenu.locator('.buttons')
21+
22+
const method = buttons.locator('select:nth-of-type(1)')
23+
await expect(method.inputValue()).resolves.toBe('silverman')
24+
})
25+
26+
test('learn', async () => {
27+
const methodMenu = page.locator('#ml_selector #method_menu')
28+
const buttons = methodMenu.locator('.buttons')
29+
30+
const fitButton = buttons.locator('input[value=Fit]')
31+
await fitButton.dispatchEvent('click')
32+
33+
const svg = page.locator('#plot-area svg')
34+
const img = svg.locator('.tile-render image')
35+
await expect(img.count()).resolves.toBeGreaterThan(0)
36+
})
37+
})
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import { jest } from '@jest/globals'
2+
jest.retryTimes(3)
3+
4+
import Matrix from '../../../lib/util/matrix.js'
5+
import MultivariateKernelDensityEstimator from '../../../lib/model/multivariate_kernel_density_estimator.js'
6+
7+
import { correlation } from '../../../lib/evaluate/regression.js'
8+
9+
describe('density estimation', () => {
10+
test.each([undefined, 'silverman', 'scott'])('%p', method => {
11+
const model = new MultivariateKernelDensityEstimator(method)
12+
const sgm = Matrix.fromArray([
13+
[0.1, 0],
14+
[0, 0.5],
15+
])
16+
const x = Matrix.randn(500, 2, 0, sgm.toArray()).toArray()
17+
18+
model.fit(x)
19+
const y = model.predict(x)
20+
expect(y).toHaveLength(x.length)
21+
22+
const p = []
23+
for (let i = 0; i < x.length; i++) {
24+
const pi =
25+
Math.exp(-x[i].reduce((s, v, d) => s + v ** 2 / sgm.at(d, d), 0) / 2) /
26+
Math.sqrt((2 * Math.PI) ** 2 * sgm.det())
27+
p[i] = pi / 2
28+
}
29+
const corr = correlation(y, p)
30+
expect(corr).toBeGreaterThan(0.9)
31+
})
32+
})

0 commit comments

Comments
 (0)