From 72213a71e7ac089612e5504a8280781491256300 Mon Sep 17 00:00:00 2001 From: ishii-norimi Date: Sat, 25 Oct 2025 17:11:37 +0900 Subject: [PATCH] Add multivariate kernel density estimator --- README.md | 4 +- js/model_selector.js | 2 + .../multivariate_kernel_density_estimator.js | 36 +++++++++ .../multivariate_kernel_density_estimator.js | 80 +++++++++++++++++++ ...tivariate_kernel_density_estimator.test.js | 37 +++++++++ ...tivariate_kernel_density_estimator.test.js | 32 ++++++++ 6 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 js/view/multivariate_kernel_density_estimator.js create mode 100644 lib/model/multivariate_kernel_density_estimator.js create mode 100644 tests/gui/view/multivariate_kernel_density_estimator.test.js create mode 100644 tests/lib/model/multivariate_kernel_density_estimator.test.js diff --git a/README.md b/README.md index 14999928..5bed24f3 100644 --- a/README.md +++ b/README.md @@ -127,11 +127,11 @@ for (let i = 0; i < n; i++) { | regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MARS, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median | | interpolation | Nearest neighbor, IDW, (Spherical) Linear, Brahmagupta, Logarithmic, Cosine, (Inverse) Smoothstep, Cubic, (Centripetal) Catmull-Rom, Hermit, Polynomial, Lagrange, Trigonometric, Spline, RBF Network, Akima, Natural neighbor, Delaunay | | learning to rank | Ordered logistic, Ordered probit, PRank, OAP-BPM, RankNet | -| anomaly detection | Percentile, MAD, Tukey's fences, Grubbs's test, Thompson test, Tietjen Moore test, Generalized ESD, Hotelling, MT, MCD, k-nearest neighbor, LOF, COF, ODIN, LDOF, INFLO, LOCI, LoOP, RDF, LDF, KDEOS, RDOS, NOF, RKOF, ABOD, PCA, OCSVM, KDE, GMM, Isolation forest, Autoencoder, GAN | +| anomaly detection | Percentile, MAD, Tukey's fences, Grubbs's test, Thompson test, Tietjen Moore test, Generalized ESD, Hotelling, MT, MCD, k-nearest neighbor, LOF, COF, ODIN, LDOF, INFLO, LOCI, LoOP, RDF, LDF, KDEOS, RDOS, NOF, RKOF, ABOD, PCA, OCSVM, (Multivariate) KDE, GMM, Isolation forest, Autoencoder, GAN | | dimensionality reduction | Random projection, (Dual / Kernel / Incremental / Probabilistic) PCA, GPLVM, LSA, MDS, Linear discriminant analysis, NCA, ICA, Principal curve, Sammon, FastMap, Sliced inverse regression, LLE, HLLE, MLLE, Laplacian eigenmaps, Isomap, LTSA, Diffusion map, SNE, t-SNE, UMAP, SOM, GTM, NMF, MOD, K-SVD, Autoencoder, VAE | | feature selection | Mutual information, Ridge, Lasso, Elastic net, Decision tree, NCA | | transformation | Box-Cox, Yeo-Johnson | -| density estimation | Histogram, Average shifted histogram, Polynomial histogram, Maximum likelihood, Kernel density estimation, k-nearest neighbor, Naive Bayes, GMM, HMM | +| density estimation | Histogram, Average shifted histogram, Polynomial histogram, Maximum likelihood, (Multivariate) Kernel density estimation, k-nearest neighbor, Naive Bayes, GMM, HMM | | generate | MH, Slice sampling, GMM, GBRBM, HMM, VAE, GAN, NICE, Diffusion | | smoothing | (Linear weighted / Triangular / Cumulative) Moving average, Exponential average, Moving median, KZ filter, Savitzky Golay filter, Hampel filter, Kalman filter, Particle filter, Lowpass filter, Bessel filter, Butterworth filter, Chebyshev filter, Elliptic filter | | timeseries prediction | Holt winters, AR, ARMA, SDAR, VAR, Kalman filter, MLP, RNN | diff --git a/js/model_selector.js b/js/model_selector.js index 54e6799c..2bbdd72a 100644 --- a/js/model_selector.js +++ b/js/model_selector.js @@ -400,6 +400,7 @@ const AIMethods = [ { value: 'pca', title: 'PCA' }, { value: 'ocsvm', title: 'One class SVM' }, { value: 'kernel_density_estimator', title: 'Kernel Density Estimator' }, + { value: 'multivariate_kernel_density_estimator', title: 'Multivariate Kernel Density Estimator' }, { value: 'gmm', title: 'Gaussian mixture model' }, { value: 'isolation_forest', title: 'Isolation Forest' }, { value: 'autoencoder', title: 'Autoencoder' }, @@ -467,6 +468,7 @@ const AIMethods = [ { value: 'polynomial_histogram', title: 'Polynomial Histogram' }, { value: 'maximum_likelihood', title: 'Maximum Likelihood' }, { value: 'kernel_density_estimator', title: 'Kernel Density Estimator' }, + { value: 'multivariate_kernel_density_estimator', title: 'Multivariate Kernel Density Estimator' }, { value: 'knearestneighbor', title: 'k nearest neighbor' }, { value: 'naive_bayes', title: 'Naive Bayes' }, { value: 'gmm', title: 'Gaussian mixture model' }, diff --git a/js/view/multivariate_kernel_density_estimator.js b/js/view/multivariate_kernel_density_estimator.js new file mode 100644 index 00000000..0727b411 --- /dev/null +++ b/js/view/multivariate_kernel_density_estimator.js @@ -0,0 +1,36 @@ +import MultivariateKernelDensityEstimator from '../../lib/model/multivariate_kernel_density_estimator.js' +import Controller from '../controller.js' +import { specialCategory } from '../utils.js' + +export default function (platform) { + platform.setting.ml.usage = 'Click and add data point. Next, click "Fit" button.' + platform.setting.ml.reference = { + title: 'Multivariate kernel density estimation (Wikipedia)', + url: 'https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation', + } + const controller = new Controller(platform) + const fitModel = () => { + const model = new MultivariateKernelDensityEstimator(method.value) + model.fit(platform.trainInput) + + const pred = model.predict(platform.testInput(8)) + if (platform.task === 'DE') { + const min = Math.min(...pred) + const max = Math.max(...pred) + platform.testResult(pred.map(v => specialCategory.density((v - min) / (max - min)))) + } else { + const y = model.predict(platform.trainInput) + platform.trainResult = y.map(v => v < threshold.value) + platform.testResult(pred.map(v => v < threshold.value)) + } + } + + const method = controller.select(['silverman', 'scott']) + let threshold = null + if (platform.task === 'AD') { + threshold = controller.input + .number({ label: ' threshold = ', min: 0, max: 10, step: 0.01, value: 0.3 }) + .on('change', fitModel) + } + controller.input.button('Fit').on('click', () => fitModel()) +} diff --git a/lib/model/multivariate_kernel_density_estimator.js b/lib/model/multivariate_kernel_density_estimator.js new file mode 100644 index 00000000..78e6d657 --- /dev/null +++ b/lib/model/multivariate_kernel_density_estimator.js @@ -0,0 +1,80 @@ +import Matrix from '../util/matrix.js' + +/** + * Multivariate kernel density estimator + */ +export default class MultivariateKernelDensityEstimator { + // https://en.wikipedia.org/wiki/Multivariate_kernel_density_estimation + /** + * @param {'silverman' | 'scott'} [method] Optimal bandwidth method + */ + constructor(method = 'silverman') { + this._method = method + } + + _kernel(invh, sqrtdeth, x) { + const d = x.cols + const k = x.dot(invh) + k.mult(x) + const ks = k.sum(1) + ks.map(v => Math.exp(-v / 2) / ((2 * Math.PI) ** (d / 2) * sqrtdeth)) + + return ks + } + + /** + * Fit model. + * @param {Array>} x Training data + */ + fit(x) { + this._x = Matrix.fromArray(x) + + const n = x.length + const d = x[0].length + if (this._method === 'pi') { + throw new Error('Not implemented') + } else if (this._method === 'scv') { + throw new Error('Not implemented') + } else if (this._method === 'silverman') { + const std = this._x.std(0).value + this._h = Matrix.zeros(d, d) + const s = (4 / (d + 2)) ** (1 / (d + 4)) / n ** (1 / (d + 4)) + for (let i = 0; i < d; i++) { + this._h.set(i, i, (std[i] * s) ** 2) + } + } else if (this._method === 'scott') { + const std = this._x.std(0).value + this._h = Matrix.zeros(d, d) + const s = 1 / n ** (1 / (d + 4)) + for (let i = 0; i < d; i++) { + this._h.set(i, i, (std[i] * s) ** 2) + } + } + + this._invh = this._h.inv() + this._hsqrtdet = Math.sqrt(this._h.det()) + } + + /** + * Returns probabilities of the datas. + * @param {Array>} x Sample data + * @returns {number[]} Predicted values + */ + probability(x) { + return x.map(v => { + const xi = new Matrix(1, v.length, v) + xi.isub(this._x) + + return this._kernel(this._invh, this._hsqrtdet, xi).mean() + }) + } + + /** + * Returns probabilities of the datas. + * @param {Array>} x Sample data + * @returns {number[]} Predicted values + */ + predict(x) { + return this.probability(x) + } +} diff --git a/tests/gui/view/multivariate_kernel_density_estimator.test.js b/tests/gui/view/multivariate_kernel_density_estimator.test.js new file mode 100644 index 00000000..bebdbb60 --- /dev/null +++ b/tests/gui/view/multivariate_kernel_density_estimator.test.js @@ -0,0 +1,37 @@ +import { getPage } from '../helper/browser' + +describe('density estimation', () => { + /** @type {Awaited>} */ + let page + beforeEach(async () => { + page = await getPage() + const taskSelectBox = page.locator('#ml_selector dl:first-child dd:nth-child(5) select') + await taskSelectBox.selectOption('DE') + const modelSelectBox = page.locator('#ml_selector .model_selection #mlDisp') + await modelSelectBox.selectOption('multivariate_kernel_density_estimator') + }) + + afterEach(async () => { + await page?.close() + }) + + test('initialize', async () => { + const methodMenu = page.locator('#ml_selector #method_menu') + const buttons = methodMenu.locator('.buttons') + + const method = buttons.locator('select:nth-of-type(1)') + await expect(method.inputValue()).resolves.toBe('silverman') + }) + + test('learn', async () => { + const methodMenu = page.locator('#ml_selector #method_menu') + const buttons = methodMenu.locator('.buttons') + + const fitButton = buttons.locator('input[value=Fit]') + await fitButton.dispatchEvent('click') + + const svg = page.locator('#plot-area svg') + const img = svg.locator('.tile-render image') + await expect(img.count()).resolves.toBeGreaterThan(0) + }) +}) diff --git a/tests/lib/model/multivariate_kernel_density_estimator.test.js b/tests/lib/model/multivariate_kernel_density_estimator.test.js new file mode 100644 index 00000000..f8e13383 --- /dev/null +++ b/tests/lib/model/multivariate_kernel_density_estimator.test.js @@ -0,0 +1,32 @@ +import { jest } from '@jest/globals' +jest.retryTimes(3) + +import Matrix from '../../../lib/util/matrix.js' +import MultivariateKernelDensityEstimator from '../../../lib/model/multivariate_kernel_density_estimator.js' + +import { correlation } from '../../../lib/evaluate/regression.js' + +describe('density estimation', () => { + test.each([undefined, 'silverman', 'scott'])('%p', method => { + const model = new MultivariateKernelDensityEstimator(method) + const sgm = Matrix.fromArray([ + [0.1, 0], + [0, 0.5], + ]) + const x = Matrix.randn(500, 2, 0, sgm.toArray()).toArray() + + model.fit(x) + const y = model.predict(x) + expect(y).toHaveLength(x.length) + + const p = [] + for (let i = 0; i < x.length; i++) { + const pi = + Math.exp(-x[i].reduce((s, v, d) => s + v ** 2 / sgm.at(d, d), 0) / 2) / + Math.sqrt((2 * Math.PI) ** 2 * sgm.det()) + p[i] = pi / 2 + } + const corr = correlation(y, p) + expect(corr).toBeGreaterThan(0.9) + }) +})