Skip to content

Commit 52fa59a

Browse files
authored
Add Genetic k-means (#68)
1 parent 5598829 commit 52fa59a

File tree

5 files changed

+281
-1
lines changed

5 files changed

+281
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ for (let i = 0; i < n; i++) {
121121

122122
| task | model |
123123
| ---- | ----- |
124-
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, Weighted k-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
124+
| clustering | k-means, k-means++, k-medois, k-medians, x-means, G-means, Weighted k-means, LBG, ISODATA, Soft k-means, Fuzzy c-means, Possibilistic c-means, Kernel k-means, Genetic k-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, NMF, Autoencoder |
125125
| classification | Linear discriminant (FLD, LDA), Quadratic discriminant, Mixture discriminant, Least squares, Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, k-nearest neighbor, Radius neighbor, Fuzzy k-nearest neighbor, Nearest centroid, DANN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, ROMMA, Online gradient descent, Passive aggressive, RLS, Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, Perceptron, ADALINE, MLP, LMNN |
126126
| semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, Ladder network |
127127
| regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, spline, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |

js/model_selector.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ const AIMethods = [
5757
{ value: 'fuzzy_cmeans', title: 'Fuzzy C-Means' },
5858
{ value: 'pcm', title: 'Possibilistic C-Means' },
5959
{ value: 'kernel_kmeans', title: 'Kernel K-Means' },
60+
{ value: 'genetic_kmeans', title: 'Genetic k-means' },
6061
{ value: 'lbg', title: 'Linde-Buzo-Gray' },
6162
{ value: 'pam', title: 'PAM / CLARA' },
6263
{ value: 'clarans', title: 'CLARANS' },

js/view/genetic_kmeans.js

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import GeneticKMeans from '../../lib/model/genetic_kmeans.js'
2+
3+
var dispGKMeans = function (elm, platform) {
4+
let model = null
5+
6+
elm.append('span').text('k')
7+
elm.append('input').attr('name', 'k').attr('type', 'number').attr('min', 1).attr('max', 100).attr('value', 3)
8+
platform.setting.ml.controller
9+
.stepLoopButtons()
10+
.init(() => {
11+
platform.init()
12+
const k = +elm.select('[name=k]').property('value')
13+
model = new GeneticKMeans(k, 10)
14+
platform.fit((tx, ty) => {
15+
model.init(tx)
16+
})
17+
})
18+
.step(cb => {
19+
platform.fit((tx, ty, pred_cb) => {
20+
model.fit()
21+
const pred = model.predict(tx)
22+
pred_cb(pred.map(v => v + 1))
23+
})
24+
platform.centroids(
25+
model.centroids,
26+
model.centroids.map((c, i) => i + 1),
27+
{
28+
line: true,
29+
duration: 1000,
30+
}
31+
)
32+
cb && setTimeout(cb, 1000)
33+
})
34+
}
35+
36+
export default function (platform) {
37+
platform.setting.ml.usage = 'Click and add data point. Then, click "Step" button repeatedly.'
38+
dispGKMeans(platform.setting.ml.configElement, platform)
39+
}

lib/model/genetic_kmeans.js

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
class GeneticKMeansModel {
2+
constructor(k) {
3+
this._k = k
4+
}
5+
6+
get centroids() {
7+
return this._c
8+
}
9+
10+
_distance(a, b) {
11+
return Math.sqrt(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0))
12+
}
13+
14+
init(data) {
15+
this._data = data
16+
17+
const n = data.length
18+
const idx = []
19+
for (let i = 0; i < this._k; i++) {
20+
idx.push(Math.floor(Math.random() * (n - i)))
21+
}
22+
for (let i = n - 1; i >= 0; i--) {
23+
for (let j = n - 1; j > i; j--) {
24+
if (idx[i] <= idx[j]) {
25+
idx[j]++
26+
}
27+
}
28+
}
29+
30+
this._c = idx.map(i => this._data[i].concat())
31+
}
32+
33+
copy() {
34+
const cp = new GeneticKMeansModel(this._k)
35+
cp.init(this._data)
36+
for (let i = 0; i < this._c.length; i++) {
37+
cp._c[i] = this._c[i].concat()
38+
}
39+
return cp
40+
}
41+
42+
cost() {
43+
const pred = this.predict(this._data)
44+
let c = 0
45+
for (let i = 0; i < this._data.length; i++) {
46+
for (let j = 0; j < this._data[i].length; j++) {
47+
c += (this._data[i][j] - this._c[pred[i]][j]) ** 2
48+
}
49+
}
50+
return c
51+
}
52+
53+
fit() {
54+
const pred = this.predict(this._data)
55+
for (let k = 0; k < this._k; k++) {
56+
const m = Array(this._data[0].length).fill(0)
57+
let s = 0
58+
for (let i = 0; i < this._data.length; i++) {
59+
if (pred[i] !== k) {
60+
continue
61+
}
62+
for (let j = 0; j < m.length; j++) {
63+
m[j] += this._data[i][j]
64+
}
65+
s++
66+
}
67+
this._c[k] = m.map(v => v / s)
68+
}
69+
}
70+
71+
predict(datas) {
72+
return datas.map(value => {
73+
let min_d = Infinity
74+
let min_k = -1
75+
for (let i = 0; i < this._c.length; i++) {
76+
const d = this._distance(value, this._c[i])
77+
if (d < min_d) {
78+
min_d = d
79+
min_k = i
80+
}
81+
}
82+
return min_k
83+
})
84+
}
85+
86+
mutation(rate, cm) {
87+
const pred = this.predict(this._data)
88+
for (let i = 0; i < this._data.length; i++) {
89+
if (Math.random() >= rate) {
90+
continue
91+
}
92+
const d = this._c.map(c => this._distance(c, this._data[i]))
93+
if (d[pred[i]] === 0) {
94+
continue
95+
}
96+
const dmax = Math.max(...d)
97+
const p = d.map(v => cm * dmax - v)
98+
let r = Math.random() * p.reduce((s, v) => s + v, 0)
99+
for (let k = 0; k < p.length; k++) {
100+
r -= p[k]
101+
if (r <= 0) {
102+
pred[i] = k
103+
break
104+
}
105+
}
106+
}
107+
108+
for (let k = 0; k < this._k; k++) {
109+
const m = Array(this._data[0].length).fill(0)
110+
let s = 0
111+
for (let i = 0; i < this._data.length; i++) {
112+
if (pred[i] !== k) {
113+
continue
114+
}
115+
for (let j = 0; j < m.length; j++) {
116+
m[j] += this._data[i][j]
117+
}
118+
s++
119+
}
120+
this._c[k] = m.map(v => v / s)
121+
}
122+
}
123+
}
124+
125+
/**
126+
* Genetic k-means model
127+
*/
128+
export default class GeneticKMeans {
129+
// https://deepblue-ts.co.jp/machine-learning/genetic-k-means-alogorithm/
130+
// https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.520.6737&rep=rep1&type=pdf
131+
/**
132+
* @param {number} k
133+
* @param {number} size
134+
*/
135+
constructor(k, size) {
136+
this._k = k
137+
this._size = size
138+
this._c = 1
139+
this._pm = 0.1
140+
this._cm = 1
141+
142+
this._models = []
143+
for (let i = 0; i < this._size; i++) {
144+
this._models[i] = new GeneticKMeansModel(this._k)
145+
}
146+
}
147+
148+
/**
149+
* Centroids
150+
*
151+
* @type {Array<Array<number>>}
152+
*/
153+
get centroids() {
154+
return this.bestModel.centroids
155+
}
156+
157+
/**
158+
* The best model.
159+
*
160+
* @type {GeneticKMeansModel}
161+
*/
162+
get bestModel() {
163+
return this._models[0]
164+
}
165+
166+
/**
167+
* Initialize model.
168+
*
169+
* @param {Array<Array<number>>} datas
170+
*/
171+
init(datas) {
172+
this._models.forEach(m => m.init(datas))
173+
}
174+
175+
/**
176+
* Returns predicted categories.
177+
*
178+
* @param {Array<Array<number>>} datas
179+
* @returns {number[]}
180+
*/
181+
predict(datas) {
182+
return this.bestModel.predict(datas)
183+
}
184+
185+
/**
186+
* Fit model.
187+
*/
188+
fit() {
189+
const f = this._models.map(m => -m.cost())
190+
const m = f.reduce((s, v) => s + v, 0) / f.length
191+
const s = Math.sqrt(f.reduce((s, v) => s + (v - m) ** 2, 0) / f.length)
192+
const population = f.map(v => Math.max(0, v - (m - this._c * s)))
193+
const sum = population.reduce((s, v) => s + v, 0)
194+
195+
const newModels = []
196+
for (let i = 0; i < this._size; i++) {
197+
let r = Math.random() * sum
198+
for (let k = 0; k < population.length; k++) {
199+
r -= population[k]
200+
if (r <= 0) {
201+
newModels[i] = this._models[k].copy()
202+
}
203+
}
204+
}
205+
this._models = newModels
206+
207+
for (let k = 0; k < this._size; k++) {
208+
this._models[k].mutation(this._pm, this._cm)
209+
this._models[k].fit()
210+
}
211+
212+
const costs = this._models.map((m, i) => [m.cost(), i])
213+
costs.sort((a, b) => a[0] - b[0])
214+
this._models = costs.map(v => this._models[v[1]])
215+
}
216+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import Matrix from '../../../lib/util/matrix.js'
2+
import GeneticKMeans from '../../../lib/model/genetic_kmeans.js'
3+
4+
import { randIndex } from '../../../lib/evaluate/clustering.js'
5+
6+
test('predict', () => {
7+
const model = new GeneticKMeans(2, 10)
8+
const n = 50
9+
const x = Matrix.randn(n, 2, 0, 0.1).concat(Matrix.randn(n, 2, 5, 0.1)).toArray()
10+
11+
model.init(x)
12+
for (let i = 0; i < 20; i++) {
13+
model.fit()
14+
}
15+
const y = model.predict(x)
16+
expect(y).toHaveLength(x.length)
17+
18+
const t = []
19+
for (let i = 0; i < x.length; i++) {
20+
t[i] = Math.floor(i / n)
21+
}
22+
const ri = randIndex(y, t)
23+
expect(ri).toBeGreaterThan(0.9)
24+
})

0 commit comments

Comments
 (0)