@@ -65,13 +65,18 @@ var dispDQN = function (elm, env) {
65
65
return
66
66
}
67
67
const greedy_rate = + elm . select ( '[name=greedy_rate]' ) . property ( 'value' )
68
+ const min_greedy_rate = + elm . select ( '[name=min_greedy_rate]' ) . property ( 'value' )
69
+ const greedy_rate_update = + elm . select ( '[name=greedy_rate_update]' ) . property ( 'value' )
68
70
const learning_rate = + elm . select ( '[name=learning_rate]' ) . property ( 'value' )
69
71
const batch = + elm . select ( '[name=batch]' ) . property ( 'value' )
70
- agent . get_action ( env , cur_state , greedy_rate , action => {
72
+ agent . get_action ( env , cur_state , Math . max ( min_greedy_rate , greedy_rate * greedy_rate_update ) , action => {
71
73
let [ next_state , reward , done ] = env . step ( action , agent )
72
74
agent . update ( action , cur_state , next_state , reward , done , learning_rate , batch , ( ) => {
73
75
const end_proc = ( ) => {
74
76
cur_state = next_state
77
+ if ( done || env . epoch % 1000 === 999 ) {
78
+ elm . select ( '[name=greedy_rate]' ) . property ( 'value' , greedy_rate * greedy_rate_update )
79
+ }
75
80
cb && cb ( done )
76
81
}
77
82
if ( render ) {
@@ -113,6 +118,7 @@ var dispDQN = function (elm, env) {
113
118
readyNet = true
114
119
reset ( )
115
120
} )
121
+ elm . select ( '[name=greedy_rate]' ) . property ( 'value' , 1 )
116
122
} )
117
123
elm . append ( 'input' ) . attr ( 'type' , 'button' ) . attr ( 'value' , 'Reset' ) . on ( 'click' , reset )
118
124
elm . append ( 'select' )
@@ -127,13 +133,31 @@ var dispDQN = function (elm, env) {
127
133
. append ( 'option' )
128
134
. property ( 'value' , d => d )
129
135
. text ( d => d )
136
+ elm . append ( 'span' ) . text ( 'greedy rate = max(' )
137
+ elm . append ( 'input' )
138
+ . attr ( 'type' , 'number' )
139
+ . attr ( 'name' , 'min_greedy_rate' )
140
+ . attr ( 'min' , 0 )
141
+ . attr ( 'max' , 1 )
142
+ . attr ( 'step' , '0.01' )
143
+ . attr ( 'value' , 0.01 )
144
+ elm . append ( 'span' ) . text ( ', ' )
130
145
elm . append ( 'input' )
131
146
. attr ( 'type' , 'number' )
132
147
. attr ( 'name' , 'greedy_rate' )
133
148
. attr ( 'min' , 0 )
134
149
. attr ( 'max' , 1 )
135
150
. attr ( 'step' , '0.01' )
136
- . attr ( 'value' , 0.3 )
151
+ . attr ( 'value' , 1 )
152
+ elm . append ( 'span' ) . text ( ' * ' )
153
+ elm . append ( 'input' )
154
+ . attr ( 'type' , 'number' )
155
+ . attr ( 'name' , 'greedy_rate_update' )
156
+ . attr ( 'min' , 0 )
157
+ . attr ( 'max' , 1 )
158
+ . attr ( 'step' , '0.01' )
159
+ . attr ( 'value' , 0.995 )
160
+ elm . append ( 'span' ) . text ( ') ' )
137
161
elm . append ( 'span' ) . text ( ' Learning rate ' )
138
162
elm . append ( 'input' )
139
163
. attr ( 'type' , 'number' )
0 commit comments