@@ -28,12 +28,10 @@ import (
28
28
"errors"
29
29
"fmt"
30
30
"os"
31
- "runtime"
32
31
"sync"
33
32
"syscall"
34
33
"time"
35
34
36
- "github.com/shirou/gopsutil/cpu"
37
35
"github.com/uber-go/tally"
38
36
"go.uber.org/zap"
39
37
"go.uber.org/zap/zapcore"
@@ -140,10 +138,11 @@ type (
140
138
logger * zap.Logger
141
139
metricsScope tally.Scope
142
140
143
- pollerRequestCh chan struct {}
144
- pollerAutoScaler * pollerAutoScaler
145
- taskQueueCh chan interface {}
146
- sessionTokenBucket * sessionTokenBucket
141
+ pollerRequestCh chan struct {}
142
+ pollerAutoScaler * pollerAutoScaler
143
+ workerUsageCollector * workerUsageCollector
144
+ taskQueueCh chan interface {}
145
+ sessionTokenBucket * sessionTokenBucket
147
146
}
148
147
149
148
polledTask struct {
@@ -173,17 +172,29 @@ func newBaseWorker(options baseWorkerOptions, logger *zap.Logger, metricsScope t
173
172
logger ,
174
173
)
175
174
}
175
+ // for now it's default to be enabled
176
+ var workerUC * workerUsageCollector
177
+ workerUC = newWorkerUsageCollector (
178
+ workerUsageCollectorOptions {
179
+ Enabled : true ,
180
+ Cooldown : 30 * time .Second ,
181
+ Host : options .host ,
182
+ MetricsScope : metricsScope ,
183
+ },
184
+ logger ,
185
+ )
176
186
177
187
bw := & baseWorker {
178
- options : options ,
179
- shutdownCh : make (chan struct {}),
180
- taskLimiter : rate .NewLimiter (rate .Limit (options .maxTaskPerSecond ), 1 ),
181
- retrier : backoff .NewConcurrentRetrier (pollOperationRetryPolicy ),
182
- logger : logger .With (zapcore.Field {Key : tagWorkerType , Type : zapcore .StringType , String : options .workerType }),
183
- metricsScope : tagScope (metricsScope , tagWorkerType , options .workerType ),
184
- pollerRequestCh : make (chan struct {}, options .maxConcurrentTask ),
185
- pollerAutoScaler : pollerAS ,
186
- taskQueueCh : make (chan interface {}), // no buffer, so poller only able to poll new task after previous is dispatched.
188
+ options : options ,
189
+ shutdownCh : make (chan struct {}),
190
+ taskLimiter : rate .NewLimiter (rate .Limit (options .maxTaskPerSecond ), 1 ),
191
+ retrier : backoff .NewConcurrentRetrier (pollOperationRetryPolicy ),
192
+ logger : logger .With (zapcore.Field {Key : tagWorkerType , Type : zapcore .StringType , String : options .workerType }),
193
+ metricsScope : tagScope (metricsScope , tagWorkerType , options .workerType ),
194
+ pollerRequestCh : make (chan struct {}, options .maxConcurrentTask ),
195
+ pollerAutoScaler : pollerAS ,
196
+ workerUsageCollector : workerUC ,
197
+ taskQueueCh : make (chan interface {}), // no buffer, so poller only able to poll new task after previous is dispatched.
187
198
188
199
limiterContext : ctx ,
189
200
limiterContextCancel : cancel ,
@@ -207,6 +218,10 @@ func (bw *baseWorker) Start() {
207
218
bw .pollerAutoScaler .Start ()
208
219
}
209
220
221
+ if bw .workerUsageCollector != nil {
222
+ bw .workerUsageCollector .Start ()
223
+ }
224
+
210
225
for i := 0 ; i < bw .options .pollerCount ; i ++ {
211
226
bw .shutdownWG .Add (1 )
212
227
go bw .runPoller ()
@@ -215,15 +230,6 @@ func (bw *baseWorker) Start() {
215
230
bw .shutdownWG .Add (1 )
216
231
go bw .runTaskDispatcher ()
217
232
218
- // We want the emit function run once per host instead of run once per worker
219
- //collectHardwareUsageOnce.Do(func() {
220
- // bw.shutdownWG.Add(1)
221
- // go bw.emitHardwareUsage()
222
- //})
223
-
224
- bw .shutdownWG .Add (1 )
225
- go bw .emitHardwareUsage ()
226
-
227
233
bw .isWorkerStarted = true
228
234
traceLog (func () {
229
235
bw .logger .Info ("Started Worker" ,
@@ -407,6 +413,9 @@ func (bw *baseWorker) Stop() {
407
413
if bw .pollerAutoScaler != nil {
408
414
bw .pollerAutoScaler .Stop ()
409
415
}
416
+ if bw .workerUsageCollector != nil {
417
+ bw .workerUsageCollector .Stop ()
418
+ }
410
419
411
420
if success := util .AwaitWaitGroup (& bw .shutdownWG , bw .options .shutdownTimeout ); ! success {
412
421
traceLog (func () {
@@ -420,53 +429,3 @@ func (bw *baseWorker) Stop() {
420
429
}
421
430
return
422
431
}
423
-
424
- func (bw * baseWorker ) emitHardwareUsage () {
425
- defer func () {
426
- if p := recover (); p != nil {
427
- bw .metricsScope .Counter (metrics .WorkerPanicCounter ).Inc (1 )
428
- topLine := fmt .Sprintf ("base worker for %s [panic]:" , bw .options .workerType )
429
- st := getStackTraceRaw (topLine , 7 , 0 )
430
- bw .logger .Error ("Unhandled panic in hardware emitting." ,
431
- zap .String (tagPanicError , fmt .Sprintf ("%v" , p )),
432
- zap .String (tagPanicStack , st ))
433
- }
434
- }()
435
- defer bw .shutdownWG .Done ()
436
- collectHardwareUsageOnce .Do (
437
- func () {
438
- ticker := time .NewTicker (hardwareMetricsCollectInterval )
439
- for {
440
- select {
441
- case <- bw .shutdownCh :
442
- ticker .Stop ()
443
- return
444
- case <- ticker .C :
445
- host := bw .options .host
446
- scope := bw .metricsScope .Tagged (map [string ]string {clientHostTag : host })
447
-
448
- cpuPercent , err := cpu .Percent (0 , false )
449
- if err != nil {
450
- bw .logger .Warn ("Failed to get cpu percent" , zap .Error (err ))
451
- return
452
- }
453
- cpuCores , err := cpu .Counts (false )
454
- if err != nil {
455
- bw .logger .Warn ("Failed to get number of cpu cores" , zap .Error (err ))
456
- return
457
- }
458
- scope .Gauge (metrics .NumCPUCores ).Update (float64 (cpuCores ))
459
- scope .Gauge (metrics .CPUPercentage ).Update (cpuPercent [0 ])
460
-
461
- var memStats runtime.MemStats
462
- runtime .ReadMemStats (& memStats )
463
-
464
- scope .Gauge (metrics .NumGoRoutines ).Update (float64 (runtime .NumGoroutine ()))
465
- scope .Gauge (metrics .TotalMemory ).Update (float64 (memStats .Sys ))
466
- scope .Gauge (metrics .MemoryUsedHeap ).Update (float64 (memStats .HeapInuse ))
467
- scope .Gauge (metrics .MemoryUsedStack ).Update (float64 (memStats .StackInuse ))
468
- }
469
- }
470
- })
471
-
472
- }
0 commit comments