Skip to content

Commit 44b1e48

Browse files
committed
Add A2C model
1 parent fd66aff commit 44b1e48

File tree

6 files changed

+496
-1
lines changed

6 files changed

+496
-1
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Javascript AI package and demo on the browser.
4343
| edge detection | Roberts cross, Sobel, Prewitt, Laplacian, LoG, Canny, Snakes |
4444
| word embedding | Word2Vec |
4545
| recommendation | association analysis |
46-
| markov decision process | Dynamic programming, Monte carlo, Q learning, SARSA, Policy gradient, DQN, Genetic algorithm |
46+
| markov decision process | Dynamic programming, Monte carlo, Q learning, SARSA, Policy gradient, DQN, DDQN, A2C, Genetic algorithm |
4747
| game | |
4848

4949
## Datas

js/model_selector.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ const AIMethods = [
404404
{ value: 'sarsa', title: 'SARSA' },
405405
{ value: 'policy_gradient', title: 'Policy Gradient' },
406406
{ value: 'dqn', title: 'DQN / DDQN' },
407+
{ value: 'a2c', title: 'A2C' },
407408
{ value: 'genetic_algorithm', title: 'Genetic Algorithm' },
408409
],
409410
},

js/view/a2c.js

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import A2CAgent from '../../lib/model/a2c.js'
2+
3+
class A2CCBAgent {
4+
constructor(env, resolution, layers, optimizer, use_worker, cb) {
5+
this._agent = new A2CAgent(env, resolution, 50, layers, optimizer)
6+
cb && cb()
7+
}
8+
9+
set method(value) {
10+
this._agent.method = value
11+
}
12+
13+
terminate() {}
14+
15+
get_score(env, cb) {
16+
const score = this._agent.get_score(env)
17+
cb && cb(score)
18+
}
19+
20+
get_action(env, state, cb) {
21+
const action = this._agent.get_action(env, state)
22+
cb && cb(action)
23+
}
24+
25+
update(action, state, next_state, reward, done, learning_rate, batch, cb) {
26+
this._agent.update(action, state, next_state, reward, done, learning_rate, batch)
27+
cb && cb()
28+
}
29+
}
30+
31+
var dispA2C = function (elm, env) {
32+
let resolution = 20
33+
if (env.type === 'grid') {
34+
env.env._max_step = 1000
35+
resolution = Math.max(...env.env.size)
36+
}
37+
const builder = new NeuralNetworkBuilder()
38+
39+
const use_worker = false
40+
let readyNet = false
41+
let agent = null
42+
let cur_state = env.reset(agent)
43+
44+
const render_score = cb => {
45+
if (env.type === 'grid') {
46+
agent.get_score(env, score => {
47+
env.render(() => score)
48+
cb && cb()
49+
})
50+
} else {
51+
env.render()
52+
cb && cb()
53+
}
54+
}
55+
56+
const step = (cb, render = true) => {
57+
if (!readyNet) {
58+
cb && cb()
59+
return
60+
}
61+
const learning_rate = +elm.select('[name=learning_rate]').property('value')
62+
const batch = +elm.select('[name=batch]').property('value')
63+
agent.get_action(env, cur_state, action => {
64+
let [next_state, reward, done] = env.step(action, agent)
65+
agent.update(action, cur_state, next_state, reward, done, learning_rate, batch, () => {
66+
const end_proc = () => {
67+
cur_state = next_state
68+
cb && cb(done)
69+
}
70+
if (render) {
71+
render_score(end_proc)
72+
} else {
73+
end_proc()
74+
}
75+
})
76+
})
77+
}
78+
79+
const reset = cb => {
80+
if (!readyNet) {
81+
cb && cb()
82+
return
83+
}
84+
cur_state = env.reset(agent)
85+
render_score(() => {
86+
cb && cb()
87+
})
88+
}
89+
90+
elm.append('span').text(' Hidden Layers ')
91+
builder.makeHtml(elm, { optimizer: true })
92+
agent = new A2CCBAgent(env, resolution, builder.layers, builder.optimizer, use_worker, () => {
93+
readyNet = true
94+
setTimeout(() => {
95+
render_score(() => {
96+
elm.selectAll('input').property('disabled', false)
97+
})
98+
}, 0)
99+
})
100+
elm.append('input')
101+
.attr('type', 'button')
102+
.attr('value', 'New agent')
103+
.on('click', () => {
104+
agent.terminate()
105+
agent = new A2CCBAgent(env, resolution, builder.layers, builder.optimizer, use_worker, () => {
106+
readyNet = true
107+
reset()
108+
})
109+
elm.select('[name=greedy_rate]').property('value', 1)
110+
})
111+
elm.append('input').attr('type', 'button').attr('value', 'Reset').on('click', reset)
112+
elm.append('span').text(' Learning rate ')
113+
elm.append('input')
114+
.attr('type', 'number')
115+
.attr('name', 'learning_rate')
116+
.attr('min', 0)
117+
.attr('max', 100)
118+
.attr('step', 0.01)
119+
.attr('value', 0.001)
120+
elm.append('span').text(' Batch size ')
121+
elm.append('input')
122+
.attr('type', 'number')
123+
.attr('name', 'batch')
124+
.attr('value', 10)
125+
.attr('min', 1)
126+
.attr('max', 100)
127+
.attr('step', 1)
128+
elm.append('input')
129+
.attr('type', 'button')
130+
.attr('value', 'Step')
131+
.on('click', () => step())
132+
let isRunning = false
133+
const epochButton = elm
134+
.append('input')
135+
.attr('type', 'button')
136+
.attr('value', 'Epoch')
137+
.on('click', () => {
138+
isRunning = !isRunning
139+
epochButton.attr('value', isRunning ? 'Stop' : 'Epoch')
140+
skipButton.property('disabled', isRunning)
141+
if (isRunning) {
142+
;(function loop() {
143+
if (isRunning) {
144+
step(done => {
145+
setTimeout(() => (done ? reset(loop) : loop()))
146+
})
147+
} else {
148+
setTimeout(() => {
149+
render_score(() => {
150+
epochButton.attr('value', 'Epoch')
151+
})
152+
}, 0)
153+
}
154+
})()
155+
}
156+
})
157+
const skipButton = elm
158+
.append('input')
159+
.attr('type', 'button')
160+
.attr('value', 'Skip')
161+
.on('click', () => {
162+
isRunning = !isRunning
163+
skipButton.attr('value', isRunning ? 'Stop' : 'Skip')
164+
epochButton.property('disabled', isRunning)
165+
if (isRunning) {
166+
let lastt = new Date().getTime()
167+
;(function loop() {
168+
while (isRunning) {
169+
let dn = false
170+
step(done => {
171+
dn = done
172+
if (use_worker) {
173+
done ? reset(loop) : loop()
174+
}
175+
}, true)
176+
if (use_worker) {
177+
return
178+
}
179+
const curt = new Date().getTime()
180+
if (dn) {
181+
reset()
182+
}
183+
if (curt - lastt > 200) {
184+
lastt = curt
185+
setTimeout(loop, 0)
186+
return
187+
}
188+
}
189+
render_score(() => {
190+
skipButton.attr('value', 'Skip')
191+
})
192+
})()
193+
}
194+
})
195+
env.plotRewards(elm)
196+
197+
elm.selectAll('input').property('disabled', true)
198+
199+
return () => {
200+
isRunning = false
201+
agent.terminate()
202+
}
203+
}
204+
205+
export default function (platform) {
206+
platform.setting.ml.usage = 'Click "step" to update.'
207+
platform.setting.terminate = dispA2C(platform.setting.ml.configElement, platform)
208+
}

0 commit comments

Comments
 (0)