Skip to content

Commit 7227245

Browse files
authored
Add DiSH (#885)
1 parent 94e5966 commit 7227245

File tree

6 files changed

+329
-1
lines changed

6 files changed

+329
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {
121121

122122
| task | model |
123123
| ---- | ----- |
124-
| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, NMF, Autoencoder |
124+
| clustering | (Soft / Kernel / Genetic / Weighted / Bisecting) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, k-harmonic means, MacQueen, Hartigan-Wong, Elkan, Hamelry, Drake, Yinyang, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, DTSCAN, HDBSCAN, DENCLUE, DBCLASD, BRIDGE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, (Growing) SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, COLL, CLIQUE, PROCLUS, ORCLUS, FINDIT, DOC, FastDOC, DiSH, NMF, Autoencoder |
125125
| classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, (Multiclass / Kernel) Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, ENaN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, (Bounded) Online gradient descent, (Budgeted online) Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, (Multiclass) BSGD, ILK, SILK, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MADALINE, MLP, ELM, LMNN |
126126
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network |
127127
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, LOESS, spline, Naive Bayes, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, ELM, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |

js/model_selector.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ const AIMethods = [
150150
{ value: 'orclus', title: 'ORCLUS' },
151151
{ value: 'findit', title: 'FINDIT' },
152152
{ value: 'doc', title: 'DOC / FastDOC' },
153+
{ value: 'dish', title: 'DiSH' },
153154
{ value: 'plsa', title: 'PLSA' },
154155
{ value: 'latent_dirichlet_allocation', title: 'Latent Dirichlet Allocation' },
155156
{ value: 'nmf', title: 'NMF' },

js/view/dish.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import DiSH from '../../lib/model/dish.js'
2+
import Controller from '../controller.js'
3+
4+
export default function (platform) {
5+
platform.setting.ml.usage = 'Click and add data point. Then, click "Fit" button.'
6+
platform.setting.ml.reference = {
7+
author: 'E. Achtert, C. Bohm, H. P. Kriegel, P. Kroger, I. Muller-Gorman, A. Zimek',
8+
title: 'Detection and Visualization of Subspace Cluster Hierarchies',
9+
year: 2007,
10+
}
11+
const controller = new Controller(platform)
12+
13+
const fitModel = () => {
14+
let model = new DiSH(mu.value, e.value)
15+
16+
const pred = model.predict(platform.trainInput).map(v => v + 1)
17+
platform.trainResult = pred
18+
}
19+
20+
const mu = controller.input.number({ label: ' mu ', min: 1, max: 1000, value: 20 })
21+
const e = controller.input.number({ label: ' e ', min: 0, max: 100, step: 0.1, value: 0.1 })
22+
23+
controller.input.button('Fit').on('click', fitModel)
24+
}

lib/model/dish.js

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
class PriorityQueue {
2+
constructor(arr) {
3+
this._value = arr || []
4+
}
5+
6+
get length() {
7+
return this._value.length
8+
}
9+
10+
[Symbol.iterator]() {
11+
return this._value[Symbol.iterator]()
12+
}
13+
14+
_sort() {
15+
this._value.sort((a, b) => a[1] - b[1])
16+
}
17+
18+
push(value, priority) {
19+
this._value.push([value, priority])
20+
this._sort()
21+
}
22+
23+
move(value, priority) {
24+
for (let i = 0; i < this.length; i++) {
25+
if (this._value[i][0] === value) {
26+
this._value[i][1] = priority
27+
this._sort()
28+
return
29+
}
30+
}
31+
this.push(value, priority)
32+
}
33+
34+
shift() {
35+
const [value] = this._value.shift()
36+
return value
37+
}
38+
}
39+
40+
/**
41+
* Detecting Subspace cluster Hierarchies
42+
*/
43+
export default class DiSH {
44+
// Detection and Visualization of Subspace Cluster Hierarchies
45+
// https://imada.sdu.dk/u/zimek/publications/DASFAA2007/detection.pdf
46+
/**
47+
* @param {number} mu Number of neighborhood
48+
* @param {number} e Neighborhood range
49+
*/
50+
constructor(mu, e) {
51+
this._mu = mu
52+
this._e = e
53+
}
54+
55+
/**
56+
* Returns predicted categories.
57+
* @param {Array<Array<number>>} x Training data
58+
* @returns {number[]} Predicted values
59+
*/
60+
predict(x) {
61+
const n = x.length
62+
const a = x[0].length
63+
const w = []
64+
for (let i = 0; i < n; i++) {
65+
const nears = []
66+
for (let k = 0; k < a; k++) {
67+
nears[k] = new Set()
68+
for (let j = 0; j < n; j++) {
69+
if (Math.abs(x[i][k] - x[j][k]) <= this._e) {
70+
nears[k].add(j)
71+
}
72+
}
73+
}
74+
const c = new Set()
75+
let max_n = -1
76+
let max_k = -1
77+
for (let k = 0; k < a; k++) {
78+
if (nears[k].size >= this._mu) {
79+
if (max_n < 0) {
80+
max_n = nears[k].size
81+
max_k = k
82+
} else if (nears[k].size < max_n) {
83+
c.add(max_k)
84+
max_n = nears[k].size
85+
max_k = k
86+
} else {
87+
c.add(k)
88+
}
89+
}
90+
}
91+
w[i] = Array(a).fill(0)
92+
if (max_k < 0) {
93+
continue
94+
}
95+
w[i][max_k] = 1
96+
97+
let int = nears[max_k]
98+
while (c.length > 0) {
99+
let max_n = -1
100+
let max_k = -1
101+
for (const k of c) {
102+
const intn = new Set()
103+
for (const a of int) {
104+
if (nears[k].has(a)) {
105+
intn.add(a)
106+
}
107+
}
108+
nears[k] = intn
109+
if (nears[k].size >= max_n) {
110+
max_n = nears[k].size
111+
max_k = k
112+
}
113+
}
114+
if (max_n < this._mu) {
115+
break
116+
}
117+
w[i][max_k] = 1
118+
c.delete(max_k)
119+
int = nears[max_k]
120+
}
121+
}
122+
123+
const queue = new PriorityQueue()
124+
for (let i = 0; i < n; i++) {
125+
queue.push(i, Infinity)
126+
}
127+
128+
const sdists = []
129+
for (let i = 0; i < n; i++) {
130+
sdists[i] = []
131+
sdists[i][i] = [i, 0]
132+
for (let j = 0; j < i; j++) {
133+
let lambda = 0
134+
let sd = 0
135+
for (let k = 0; k < a; k++) {
136+
if (w[i][k] === 0 || w[j][k] === 0) {
137+
lambda++
138+
}
139+
if (w[i][k] === 1 && w[j][k] === 1) {
140+
sd += (x[i][k] - x[j][k]) ** 2
141+
}
142+
}
143+
const d = lambda + (Math.sqrt(sd) > 2 * this._e ? 1 : 0)
144+
sdists[i][j] = [j, d]
145+
sdists[j][i] = [i, d]
146+
}
147+
}
148+
149+
const co = []
150+
while (queue.length > 0) {
151+
const o = queue.shift()
152+
const ss = sdists[o].concat()
153+
ss.sort((a, b) => a[1] - b[1])
154+
const [r] = ss[this._mu]
155+
for (const [p] of [...queue]) {
156+
const sr = Math.max(sdists[o][r][1], sdists[o][p][1])
157+
queue.move(p, sr)
158+
}
159+
co.push(o)
160+
}
161+
162+
const clusters = []
163+
for (let i = 0; i < n; i++) {
164+
let c = null
165+
for (let t = 0; t < clusters.length; t++) {
166+
if (clusters[t].w.some((v, k) => v !== w[co[i]][k] * w[co[i - 1]][k])) {
167+
continue
168+
}
169+
let dist = 0
170+
for (let k = 0; k < a; k++) {
171+
if (clusters[t].w[k] === 1) {
172+
dist += (clusters[t].center[k] - x[co[i]][k]) ** 2
173+
}
174+
}
175+
if (Math.sqrt(dist) > 2 * this._e) {
176+
continue
177+
}
178+
c = t
179+
break
180+
}
181+
if (c == null) {
182+
c = clusters.length
183+
clusters.push({
184+
i: [],
185+
center: Array(a).fill(0),
186+
w: w[co[i]].concat(),
187+
l: w[co[i]].reduce((s, v) => s + (v === 0 ? 1 : 0), 0),
188+
parents: [],
189+
})
190+
}
191+
clusters[c].center = clusters[c].center.map(
192+
(v, k) => (v * clusters[c].i.length + x[co[i]][k]) / (clusters[c].i.length + 1)
193+
)
194+
clusters[c].i.push(i)
195+
}
196+
197+
for (let i = 0; i < clusters.length; i++) {
198+
for (let j = 0; j < clusters.length; j++) {
199+
if (clusters[j].l <= clusters[i].l) {
200+
continue
201+
}
202+
if (clusters[j].l === a) {
203+
clusters[i].parents.push(j)
204+
continue
205+
}
206+
let dist = 0
207+
for (let k = 0; k < a; k++) {
208+
if (clusters[i].w[k] === 1 && clusters[j].w[k] === 1) {
209+
dist += (clusters[i].center[k] - clusters[j].center[k]) ** 2
210+
}
211+
}
212+
if (Math.sqrt(dist) > 2 * this._e) {
213+
continue
214+
}
215+
216+
let target = true
217+
for (let t = 0; t < clusters[i].parents.length; t++) {
218+
if (clusters[clusters[i].parents[t]].l < clusters[j].l) {
219+
target = false
220+
break
221+
}
222+
}
223+
if (target) {
224+
clusters[i].parents.push(j)
225+
}
226+
}
227+
}
228+
this._clusters = clusters
229+
230+
const p = []
231+
for (let c = 0; c < clusters.length; c++) {
232+
for (let i = 0; i < clusters[c].i.length; i++) {
233+
p[clusters[c].i[i]] = c
234+
}
235+
}
236+
return p
237+
}
238+
}

tests/gui/view/dish.test.js

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import { getPage } from '../helper/browser'
2+
3+
describe('clustering', () => {
4+
/** @type {Awaited<ReturnType<getPage>>} */
5+
let page
6+
beforeEach(async () => {
7+
page = await getPage()
8+
const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
9+
await taskSelectBox.selectOption('CT')
10+
const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
11+
await modelSelectBox.selectOption('dish')
12+
})
13+
14+
afterEach(async () => {
15+
await page?.close()
16+
})
17+
18+
test('initialize', async () => {
19+
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
20+
const buttons = await methodMenu.waitForSelector('.buttons')
21+
22+
const mu = await buttons.waitForSelector('input:nth-of-type(1)')
23+
await expect(mu.getAttribute('value')).resolves.toBe('20')
24+
const e = await buttons.waitForSelector('input:nth-of-type(2)')
25+
await expect(e.getAttribute('value')).resolves.toBe('0.1')
26+
})
27+
28+
test('learn', async () => {
29+
const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
30+
const buttons = await methodMenu.waitForSelector('.buttons')
31+
32+
const fitButton = await buttons.waitForSelector('input[value=Fit]')
33+
await fitButton.evaluate(el => el.click())
34+
35+
const svg = await page.waitForSelector('#plot-area svg')
36+
await svg.waitForSelector('.datas circle')
37+
const circles = await svg.$$('.datas circle')
38+
const colors = new Set()
39+
for (const circle of circles) {
40+
const fill = await circle.evaluate(el => el.getAttribute('fill'))
41+
colors.add(fill)
42+
}
43+
expect(colors.size).toBeGreaterThan(1)
44+
})
45+
})

tests/lib/model/dish.test.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import Matrix from '../../../lib/util/matrix.js'
2+
import DiSH from '../../../lib/model/dish.js'
3+
4+
import { randIndex } from '../../../lib/evaluate/clustering.js'
5+
6+
test('clustering', () => {
7+
const model = new DiSH(5, 2.0)
8+
const n = 50
9+
const x = Matrix.concat(Matrix.randn(n, 6, 0, 0.1), Matrix.randn(n, 6, 5, 0.1)).toArray()
10+
11+
const y = model.predict(x)
12+
expect(y).toHaveLength(x.length)
13+
14+
const t = []
15+
for (let i = 0; i < x.length; i++) {
16+
t[i] = Math.floor(i / n)
17+
}
18+
const ri = randIndex(y, t)
19+
expect(ri).toBeGreaterThan(0.9)
20+
})

0 commit comments

Comments
 (0)