Skip to content

Commit 8410118

Browse files
authored
Add Monothetic Clustering (#38)
1 parent 28d1119 commit 8410118

File tree

5 files changed

+200
-1
lines changed

5 files changed

+200
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {
121121

122122
| task | model |
123123
| ---- | ----- |
124-
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Mean shift, DBSCAN, OPTICS, HDBSCAN, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
124+
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mean shift, DBSCAN, OPTICS, HDBSCAN, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
125125
| classification | Linear discriminant (FLD, LDA), Quadratic discriminant, Mixture discriminant, Least squares, Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, k-nearest neighbor, Radius neighbor, Fuzzy k-nearest neighbor, Nearest centroid, DANN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, ROMMA, Online gradient descent, Passive aggressive, RLS, Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, Perceptron, ADALINE, MLP, LMNN |
126126
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, Ladder network |
127127
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, spline, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |

js/model_selector.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ const AIMethods = [
7474
{ value: 'cure', title: 'CURE' },
7575
{ value: 'rock', title: 'ROCK' },
7676
{ value: 'diana', title: 'DIANA' },
77+
{ value: 'monothetic', title: 'Monothetic' },
7778
],
7879
Distribution: [
7980
{ value: 'gmm', title: 'Gaussian mixture model' },

js/view/monothetic.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import MonotheticClustering from '../../lib/model/monothetic.js'
2+
3+
var dispMonothetic = function (elm, platform) {
4+
let model = null
5+
6+
const fitModel = cb => {
7+
platform.fit((tx, ty, pred_cb) => {
8+
if (!model) {
9+
model = new MonotheticClustering()
10+
model.init(tx)
11+
}
12+
model.fit()
13+
const pred = model.predict()
14+
pred_cb(pred.map(v => v + 1))
15+
elm.select('[name=clusters]').text(model.size)
16+
cb && cb()
17+
})
18+
}
19+
20+
elm.append('input')
21+
.attr('type', 'button')
22+
.attr('value', 'Initialize')
23+
.on('click', () => {
24+
model = null
25+
elm.select('[name=clusters]').text(0)
26+
})
27+
elm.append('input')
28+
.attr('type', 'button')
29+
.attr('value', 'Step')
30+
.on('click', () => {
31+
fitModel()
32+
})
33+
elm.append('span').text(' Clusters: ')
34+
elm.append('span').attr('name', 'clusters')
35+
}
36+
37+
export default function (platform) {
38+
platform.setting.ml.usage = 'Click and add data point. Then, click "Step" button repeatedly.'
39+
dispMonothetic(platform.setting.ml.configElement, platform)
40+
}

lib/model/monothetic.js

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/**
2+
* Monothetic Clustering
3+
*/
4+
export default class MonotheticClustering {
5+
// https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2839&rep=rep1&type=pdf
6+
// https://cran.r-project.org/web/packages/monoClust/vignettes/monoclust.html
7+
constructor() {}
8+
9+
/**
10+
* Number of clusters
11+
*
12+
* @type {number}
13+
*/
14+
get size() {
15+
return this._leafs().length
16+
}
17+
18+
/**
19+
* Initialize model.
20+
*
21+
* @param {Array<Array<number>>} datas
22+
*/
23+
init(datas) {
24+
this._x = datas
25+
this._d = datas[0].length
26+
const idx = []
27+
for (let i = 0; i < datas.length; idx[i] = i++);
28+
this._c = { index: idx, values: this._x, children: [] }
29+
}
30+
31+
_leafs() {
32+
let leafs = [this._c]
33+
while (true) {
34+
const stk = []
35+
for (let i = 0; i < leafs.length; i++) {
36+
if (leafs[i].children.length > 0) {
37+
stk.push(...leafs[i].children)
38+
} else {
39+
stk.push(leafs[i])
40+
}
41+
}
42+
if (leafs.length === stk.length) {
43+
return leafs
44+
}
45+
leafs = stk
46+
}
47+
}
48+
49+
_distance2(a, b) {
50+
return a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0)
51+
}
52+
53+
_inertia(c) {
54+
const m = c[0].concat()
55+
for (let i = 1; i < c.length; i++) {
56+
for (let d = 0; d < this._d; d++) {
57+
m[d] += c[i][d]
58+
}
59+
}
60+
for (let d = 0; d < this._d; d++) {
61+
m[d] /= c.length
62+
}
63+
64+
let v = 0
65+
for (let i = 0; i < c.length; i++) {
66+
v += this._distance2(c[i], m)
67+
}
68+
return v
69+
}
70+
71+
/**
72+
* Fit model.
73+
*/
74+
fit() {
75+
const leafs = this._leafs()
76+
77+
let max_d = -Infinity
78+
let best_f = -1
79+
let best_t = -1
80+
let best_leaf = null
81+
for (let k = 0; k < leafs.length; k++) {
82+
const x = leafs[k].values
83+
const ck = this._inertia(x)
84+
for (let d = 0; d < this._d; d++) {
85+
const xd = x.map(v => v[d])
86+
xd.sort((a, b) => a - b)
87+
for (let i = 0; i < xd.length - 1; i++) {
88+
const t = (xd[i] + xd[i + 1]) / 2
89+
const x1 = x.filter(v => v[d] <= t)
90+
const x2 = x.filter(v => v[d] > t)
91+
const ck1 = this._inertia(x1)
92+
const ck2 = this._inertia(x2)
93+
94+
const dck = ck - ck1 - ck2
95+
if (max_d < dck) {
96+
max_d = dck
97+
best_f = d
98+
best_t = t
99+
best_leaf = leafs[k]
100+
}
101+
}
102+
}
103+
}
104+
105+
best_leaf.feature = best_f
106+
best_leaf.threshold = best_t
107+
best_leaf.children = [
108+
{
109+
index: best_leaf.index.filter((v, i) => best_leaf.values[i][best_f] <= best_t),
110+
values: best_leaf.values.filter(v => v[best_f] <= best_t),
111+
children: [],
112+
},
113+
{
114+
index: best_leaf.index.filter((v, i) => best_leaf.values[i][best_f] > best_t),
115+
values: best_leaf.values.filter(v => v[best_f] > best_t),
116+
children: [],
117+
},
118+
]
119+
}
120+
121+
/**
122+
* Returns predicted categories.
123+
*
124+
* @returns {number[]}
125+
*/
126+
predict() {
127+
const leafs = this._leafs()
128+
const p = []
129+
for (let k = 0; k < leafs.length; k++) {
130+
for (let i = 0; i < leafs[k].index.length; i++) {
131+
p[leafs[k].index[i]] = k
132+
}
133+
}
134+
return p
135+
}
136+
}

tests/lib/model/monothetic.test.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import Matrix from '../../../lib/util/matrix.js'
2+
import MonotheticClustering from '../../../lib/model/monothetic.js'
3+
4+
import { randIndex } from '../../../lib/evaluate/clustering.js'
5+
6+
test('clustering', () => {
7+
const model = new MonotheticClustering()
8+
const n = 50
9+
const x = Matrix.randn(n, 2, 0, 0.1).concat(Matrix.randn(n, 2, 5, 0.1)).toArray()
10+
11+
model.init(x)
12+
model.fit()
13+
const y = model.predict()
14+
expect(y).toHaveLength(x.length)
15+
16+
const t = []
17+
for (let i = 0; i < x.length; i++) {
18+
t[i] = Math.floor(i / n)
19+
}
20+
const ri = randIndex(y, t)
21+
expect(ri).toBeGreaterThan(0.9)
22+
})

0 commit comments

Comments
 (0)