Skip to content

Commit 5598829

Browse files
authored
Add Weighted k-means (#66)
1 parent 2e30510 commit 5598829

File tree

5 files changed

+258
-1
lines changed

5 files changed

+258
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {
121121

122122
| task | model |
123123
| ---- | ----- |
124-
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
124+
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, Weighted k-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
125125
| classification | Linear discriminant (FLD, LDA), Quadratic discriminant, Mixture discriminant, Least squares, Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, k-nearest neighbor, Radius neighbor, Fuzzy k-nearest neighbor, Nearest centroid, DANN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, ROMMA, Online gradient descent, Passive aggressive, RLS, Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, Perceptron, ADALINE, MLP, LMNN |
126126
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, Ladder network |
127127
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, spline, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |

js/model_selector.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ const AIMethods = [
5151
{ value: 'kmeans', title: 'K-Means(++) / K-Medoids / K-Medians' },
5252
{ value: 'xmeans', title: 'X-Means' },
5353
{ value: 'gmeans', title: 'G-Means' },
54+
{ value: 'weighted_kmeans', title: 'Weighted k-means' },
5455
{ value: 'isodata', title: 'ISODATA' },
5556
{ value: 'soft_kmeans', title: 'Soft K-Means' },
5657
{ value: 'fuzzy_cmeans', title: 'Fuzzy C-Means' },

js/view/weighted_kmeans.js

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import WeightedKMeans from '../../lib/model/weighted_kmeans.js'
2+
3+
var dispWKMeans = function (elm, platform) {
4+
let model = null
5+
6+
elm.append('span').text('beta')
7+
elm.append('input')
8+
.attr('name', 'beta')
9+
.attr('type', 'number')
10+
.attr('min', 1)
11+
.attr('max', 10)
12+
.attr('step', 0.1)
13+
.attr('value', 2)
14+
const slbConf = platform.setting.ml.controller.stepLoopButtons().init(() => {
15+
platform.init()
16+
const beta = +elm.select('[name=beta]').property('value')
17+
model = new WeightedKMeans(beta)
18+
elm.select('[name=clusternumber]').text(model.size + ' clusters')
19+
})
20+
elm.append('input')
21+
.attr('type', 'button')
22+
.attr('value', 'Add centroid')
23+
.on('click', () => {
24+
platform.fit((tx, ty, pred_cb) => {
25+
model.add(tx)
26+
const pred = model.predict(tx)
27+
pred_cb(pred.map(v => v + 1))
28+
})
29+
platform.centroids(
30+
model.centroids,
31+
model.centroids.map((c, i) => i + 1),
32+
{ line: true }
33+
)
34+
elm.select('[name=clusternumber]').text(model.size + ' clusters')
35+
})
36+
elm.append('span').attr('name', 'clusternumber').style('padding', '0 10px').text('0 clusters')
37+
38+
slbConf.step(cb => {
39+
if (model.size === 0) {
40+
cb && cb()
41+
return
42+
}
43+
platform.fit((tx, ty, pred_cb) => {
44+
model.fit(
45+
tx,
46+
ty.map(v => v[0])
47+
)
48+
const pred = model.predict(tx)
49+
pred_cb(pred.map(v => v + 1))
50+
})
51+
platform.centroids(
52+
model.centroids,
53+
model.centroids.map((c, i) => i + 1),
54+
{
55+
line: true,
56+
duration: 1000,
57+
}
58+
)
59+
cb && setTimeout(cb, 1000)
60+
})
61+
elm.append('input')
62+
.attr('type', 'button')
63+
.attr('value', 'Skip')
64+
.on('click', () => {
65+
platform.fit((tx, ty, pred_cb) => {
66+
ty = ty.map(v => v[0])
67+
while (model.fit(tx, ty) > 1.0e-8);
68+
const pred = model.predict(tx)
69+
pred_cb(pred.map(v => v + 1))
70+
})
71+
platform.centroids(
72+
model.centroids,
73+
model.centroids.map((c, i) => i + 1),
74+
{
75+
line: true,
76+
duration: 1000,
77+
}
78+
)
79+
})
80+
}
81+
82+
export default function (platform) {
83+
platform.setting.ml.usage = 'Click and add data point. Then, click "Step" button repeatedly.'
84+
dispWKMeans(platform.setting.ml.configElement, platform)
85+
}

lib/model/weighted_kmeans.js

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/**
2+
* Weighted k-means model
3+
*/
4+
export default class WeightedKMeans {
5+
// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.715.8143&rep=rep1&type=pdf#page=124
6+
// https://www.ijert.org/research/a-comparative-study-of-k-means-and-weighted-k-means-for-clustering-IJERTV1IS10227.pdf
7+
/**
8+
* @param {number} beta
9+
*/
10+
constructor(beta) {
11+
this._beta = beta
12+
this._centroids = []
13+
this._w = null
14+
}
15+
16+
/**
17+
* Centroids
18+
*
19+
* @type {Array<Array<number>>}
20+
*/
21+
get centroids() {
22+
return this._centroids
23+
}
24+
25+
/**
26+
* Number of clusters.
27+
*
28+
* @type {number}
29+
*/
30+
get size() {
31+
return this._centroids.length
32+
}
33+
34+
_distance(a, b) {
35+
let v = 0
36+
for (let i = a.length - 1; i >= 0; i--) {
37+
v += this._w[i] ** this._beta * (a[i] - b[i]) ** 2
38+
}
39+
return v
40+
}
41+
42+
/**
43+
* Add a new cluster.
44+
*
45+
* @param {Array<Array<number>>} datas
46+
* @returns {number[]}
47+
*/
48+
add(datas) {
49+
if (!this._w) {
50+
this._w = Array(datas[0].length).fill(1 / datas[0].length)
51+
}
52+
while (true) {
53+
const p = datas[Math.floor(Math.random() * datas.length)]
54+
if (Math.min(...this._centroids.map(c => c.reduce((s, v, k) => s + (v - p[k]) ** 2, 0))) > 1.0e-8) {
55+
const cpoint = p.concat()
56+
this._centroids.push(cpoint)
57+
return cpoint
58+
}
59+
}
60+
}
61+
62+
/**
63+
* Clear all clusters.
64+
*/
65+
clear() {
66+
this._centroids = []
67+
this._w = null
68+
}
69+
70+
/**
71+
* Returns predicted categories.
72+
*
73+
* @param {Array<Array<number>>} datas
74+
* @returns {number[]}
75+
*/
76+
predict(datas) {
77+
if (this._centroids.length === 0) {
78+
return
79+
}
80+
return datas.map(value => {
81+
let min_d = Infinity
82+
let min_k = -1
83+
for (let i = 0; i < this._centroids.length; i++) {
84+
const d = this._distance(value, this._centroids[i])
85+
if (d < min_d) {
86+
min_d = d
87+
min_k = i
88+
}
89+
}
90+
return min_k
91+
})
92+
}
93+
94+
/**
95+
* Fit model and returns total distance the centroid has moved.
96+
*
97+
* @param {Array<Array<number>>} datas
98+
* @returns {number}
99+
*/
100+
fit(datas) {
101+
if (this._centroids.length === 0 || datas.length === 0) {
102+
return 0
103+
}
104+
if (!this._w) {
105+
this._w = Array(datas[0].length).fill(1 / datas[0].length)
106+
}
107+
const oldCentroids = this._centroids
108+
109+
const pred = this.predict(datas)
110+
this._centroids = this._centroids.map((c, k) => {
111+
const m = Array(datas[0].length).fill(0)
112+
let s = 0
113+
for (let i = 0; i < datas.length; i++) {
114+
if (pred[i] !== k) {
115+
continue
116+
}
117+
for (let j = 0; j < m.length; j++) {
118+
m[j] += datas[i][j]
119+
}
120+
s++
121+
}
122+
return m.map(v => v / s)
123+
})
124+
125+
const newpred = this.predict(datas)
126+
const d = Array(this._w.length).fill(0)
127+
for (let i = 0; i < datas.length; i++) {
128+
for (let j = 0; j < d.length; j++) {
129+
d[j] += (datas[i][j] - this._centroids[newpred[i]][j]) ** 2
130+
}
131+
}
132+
for (let k = 0; k < d.length; k++) {
133+
let v = 0
134+
for (let j = 0; j < d.length; j++) {
135+
v += (d[k] / d[j]) ** (1 / (this._beta - 1))
136+
}
137+
this._w[k] = 1 / v
138+
}
139+
140+
const err = oldCentroids.reduce((s, c, i) => s + this._distance(c, this._centroids[i]), 0)
141+
return err
142+
}
143+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import Matrix from '../../../lib/util/matrix.js'
2+
import WeightedKMeans from '../../../lib/model/weighted_kmeans.js'
3+
4+
import { randIndex } from '../../../lib/evaluate/clustering.js'
5+
6+
test('predict', () => {
7+
const model = new WeightedKMeans(2)
8+
const n = 50
9+
const x = Matrix.randn(n, 2, 0, 0.1).concat(Matrix.randn(n, 2, 5, 0.1)).toArray()
10+
11+
model.add(x)
12+
model.add(x)
13+
for (let i = 0; i < 20; i++) {
14+
const d = model.fit(x)
15+
if (d === 0) {
16+
break
17+
}
18+
}
19+
const y = model.predict(x)
20+
expect(y).toHaveLength(x.length)
21+
22+
const t = []
23+
for (let i = 0; i < x.length; i++) {
24+
t[i] = Math.floor(i / n)
25+
}
26+
const ri = randIndex(y, t)
27+
expect(ri).toBeGreaterThan(0.9)
28+
})

0 commit comments

Comments
 (0)