Skip to content

Commit 7ffe812

Browse files
authored
feat(dfly_bench): allow regulated throughput in 3 modes (#4962)
* feat(dfly_bench): allow regulated throughput in 3 modes 1. Coordinated omission - with --qps=0, each request is sent and then we wait for the response and so on. For pipeline mode, k requests are sent and then we wait for them to return to send another k 2. qps > 0: we schedule sending requests at frequency "qps" per connection but if pending requests count crosses a limit we slow down by throttling request sending. This mode enables gentle uncoordinated omission, where the schedule converges to the real throughput capacity of the backend (if it's slower than the target throughput). 3. qps < 0, similar as (2) but does not adjust its scheduling and may overload the server if target QPS is too high. Signed-off-by: Roman Gershman <roman@dragonflydb.io> * chore: change pipelining and coordinated omission logic Before that the uncoordinated omission only worked without pipelining. Now, with pipelining mode with send a burst of P requests and then: a) For coordinated omission - wait for all of them to complete before proceeding further b) For non-coordinated omission - we sleep to pace our single connection throughput as defined by the qps setting. Signed-off-by: Roman Gershman <roman@dragonflydb.io> --------- Signed-off-by: Roman Gershman <roman@dragonflydb.io>
1 parent 5147fa9 commit 7ffe812

File tree

2 files changed

+173
-50
lines changed

2 files changed

+173
-50
lines changed

src/server/dfly_bench.cc

Lines changed: 61 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,12 @@ using std::string;
3737

3838
ABSL_FLAG(uint16_t, p, 6379, "Server port");
3939
ABSL_FLAG(uint32_t, c, 20, "Number of connections per thread");
40-
ABSL_FLAG(uint32_t, qps, 20, "QPS schedule at which the generator sends requests to the server");
40+
ABSL_FLAG(int32_t, qps, 20,
41+
"QPS schedule at which the generator sends requests to the server "
42+
"per single connection. 0 means - coordinated omission, and positive value will throttle "
43+
"the actual qps if server is slower than the target qps. "
44+
"negative value means - hard target, without throttling.");
45+
4146
ABSL_FLAG(uint32_t, n, 1000, "Number of requests to send per connection");
4247
ABSL_FLAG(uint32_t, test_time, 0, "Testing time in seconds");
4348
ABSL_FLAG(uint32_t, d, 16, "Value size in bytes ");
@@ -604,24 +609,56 @@ void Driver::Connect(unsigned index, const tcp::endpoint& ep) {
604609

605610
void Driver::Run(uint64_t* cycle_ns, CommandGenerator* cmd_gen) {
606611
start_ns_ = absl::GetCurrentTimeNanos();
607-
unsigned pipeline = GetFlag(FLAGS_pipeline);
612+
uint32_t pipeline = std::max<uint32_t>(GetFlag(FLAGS_pipeline), 1u);
613+
bool should_throttle = GetFlag(FLAGS_qps) > 0;
608614

609615
stats_.num_clients++;
610616
int64_t time_limit_ns =
611617
time_limit_ > 0 ? int64_t(time_limit_) * 1'000'000'000 + start_ns_ : INT64_MAX;
612-
618+
int64_t now = start_ns_;
613619
SlotRange slot_range{0, kNumSlots - 1};
620+
CHECK_GT(num_reqs_, 0u);
614621

615-
for (unsigned i = 0; i < num_reqs_; ++i) {
616-
int64_t now = absl::GetCurrentTimeNanos();
622+
uint32_t num_batches = ((num_reqs_ - 1) / pipeline) + 1;
617623

618-
if (now > time_limit_ns) {
619-
break;
624+
for (unsigned i = 0; i < num_batches && now < time_limit_ns; ++i) {
625+
if (i == num_batches - 1) { // last batch
626+
pipeline = num_reqs_ - i * pipeline;
627+
}
628+
629+
for (unsigned j = 0; j < pipeline; ++j) {
630+
// TODO: this skews the distribution if slot ranges are uneven.
631+
// Ideally we would like to pick randomly a single slot from all the ranges we have
632+
// and pass it to cmd_gen->Next below.
633+
if (!shard_slots_.Empty()) {
634+
slot_range = shard_slots_.NextSlotRange(ep_, i);
635+
}
636+
637+
string cmd = cmd_gen->Next(slot_range);
638+
639+
Req req;
640+
req.start = absl::GetCurrentTimeNanos();
641+
req.might_hit = cmd_gen->might_hit();
642+
643+
reqs_.push(req);
644+
645+
error_code ec = socket_->Write(io::Buffer(cmd));
646+
if (ec && FiberSocketBase::IsConnClosed(ec)) {
647+
// TODO: report failure
648+
VLOG(1) << "Connection closed";
649+
break;
650+
}
651+
CHECK(!ec) << ec.message();
652+
if (cmd_gen->noreply()) {
653+
PopRequest();
654+
}
620655
}
656+
657+
now = absl::GetCurrentTimeNanos();
621658
if (cycle_ns) {
622659
int64_t target_ts = start_ns_ + i * (*cycle_ns);
623660
int64_t sleep_ns = target_ts - now;
624-
if (reqs_.size() > 10 && sleep_ns <= 0) {
661+
if (reqs_.size() > pipeline * 2 && should_throttle && sleep_ns <= 0) {
625662
sleep_ns = 10'000;
626663
}
627664

@@ -630,7 +667,7 @@ void Driver::Run(uint64_t* cycle_ns, CommandGenerator* cmd_gen) {
630667
// There is no point in sending more requests if they are piled up in the server.
631668
do {
632669
ThisFiber::SleepFor(chrono::nanoseconds(sleep_ns));
633-
} while (reqs_.size() > 10);
670+
} while (should_throttle && reqs_.size() > pipeline * 2);
634671
} else if (i % 256 == 255) {
635672
ThisFiber::Yield();
636673
VLOG(5) << "Behind QPS schedule";
@@ -639,33 +676,7 @@ void Driver::Run(uint64_t* cycle_ns, CommandGenerator* cmd_gen) {
639676
// Coordinated omission.
640677

641678
fb2::NoOpLock lk;
642-
cnd_.wait(lk, [this, pipeline] { return reqs_.size() < pipeline; });
643-
}
644-
645-
// TODO: this skews the distribution if slot ranges are uneven.
646-
// Ideally we would like to pick randomly a single slot from all the ranges we have
647-
// and pass it to cmd_gen->Next below.
648-
if (!shard_slots_.Empty()) {
649-
slot_range = shard_slots_.NextSlotRange(ep_, i);
650-
}
651-
652-
string cmd = cmd_gen->Next(slot_range);
653-
654-
Req req;
655-
req.start = absl::GetCurrentTimeNanos();
656-
req.might_hit = cmd_gen->might_hit();
657-
658-
reqs_.push(req);
659-
660-
error_code ec = socket_->Write(io::Buffer(cmd));
661-
if (ec && FiberSocketBase::IsConnClosed(ec)) {
662-
// TODO: report failure
663-
VLOG(1) << "Connection closed";
664-
break;
665-
}
666-
CHECK(!ec) << ec.message();
667-
if (cmd_gen->noreply()) {
668-
PopRequest();
679+
cnd_.wait(lk, [this] { return reqs_.empty(); });
669680
}
670681
}
671682

@@ -908,12 +919,15 @@ void WatchFiber(size_t num_shards, atomic_bool* finish_signal, ProactorPool* pp)
908919
num_shards = max<size_t>(num_shards, 1u);
909920
uint64_t resp_goal = GetFlag(FLAGS_c) * pp->size() * GetFlag(FLAGS_n) * num_shards;
910921
uint32_t time_limit = GetFlag(FLAGS_test_time);
922+
bool should_throttle = GetFlag(FLAGS_qps) > 0;
911923

912924
while (*finish_signal == false) {
913925
// we sleep with resolution of 1s but print with lower frequency to be more responsive
914926
// when benchmark finishes.
915927
ThisFiber::SleepFor(1s);
916-
pp->AwaitBrief([](auto, auto*) { client->AdjustCycle(); });
928+
if (should_throttle) {
929+
pp->AwaitBrief([](auto, auto*) { client->AdjustCycle(); });
930+
}
917931

918932
int64_t now = absl::GetCurrentTimeNanos();
919933
if (now - last_print < 5000'000'000LL) // 5s
@@ -1084,9 +1098,9 @@ int main(int argc, char* argv[]) {
10841098
if (protocol == RESP) {
10851099
shards = proactor->Await([&] { return FetchClusterInfo(ep, proactor); });
10861100
}
1087-
LOG(INFO) << "Connecting threads to "
1088-
<< (shards.empty() ? string("single node ")
1089-
: absl::StrCat(shards.size(), " shard cluster"));
1101+
CONSOLE_INFO << "Connecting to "
1102+
<< (shards.empty() ? string("single node ")
1103+
: absl::StrCat(shards.size(), " shard cluster"));
10901104

10911105
if (!shards.empty() && !GetFlag(FLAGS_command).empty() && GetFlag(FLAGS_cluster_skip_tags)) {
10921106
// For custom commands we may need to use the same hashtag for multiple keys.
@@ -1112,9 +1126,11 @@ int main(int argc, char* argv[]) {
11121126
CHECK_LE(key_minimum, key_maximum);
11131127

11141128
uint32_t thread_key_step = 0;
1115-
const uint32_t qps = GetFlag(FLAGS_qps);
1129+
uint32_t qps = abs(GetFlag(FLAGS_qps));
1130+
bool throttle = GetFlag(FLAGS_qps) > 0;
11161131
const int64_t interval = qps ? 1'000'000'000LL / qps : 0;
11171132
uint64_t num_reqs = GetFlag(FLAGS_n);
1133+
11181134
uint64_t total_conn_num = GetFlag(FLAGS_c) * pp->size();
11191135
uint64_t total_requests = num_reqs * total_conn_num;
11201136
uint32_t time_limit = GetFlag(FLAGS_test_time);
@@ -1130,11 +1146,12 @@ int main(int argc, char* argv[]) {
11301146

11311147
if (!time_limit) {
11321148
CONSOLE_INFO << "Running " << pp->size() << " threads, sending " << num_reqs
1133-
<< " requests per each connection, or " << total_requests << " requests overall";
1149+
<< " requests per each connection, or " << total_requests << " requests overall "
1150+
<< (throttle ? "with" : "without") << " throttling";
11341151
}
11351152
if (interval) {
1136-
CONSOLE_INFO << "At a rate of " << GetFlag(FLAGS_qps)
1137-
<< " rps per connection, i.e. request every " << interval / 1000 << "us";
1153+
CONSOLE_INFO << "At a rate of " << qps << " rps per connection, i.e. request every "
1154+
<< interval / 1000 << "us";
11381155
CONSOLE_INFO << "Overall scheduled RPS: " << qps * total_conn_num;
11391156
} else {
11401157
CONSOLE_INFO << "Coordinated omission mode - the rate is determined by the server";

tools/local/monitoring/grafana/provisioning/dashboards/dragonfly.json

Lines changed: 112 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,109 @@
11431143
"title": "Network I/O",
11441144
"type": "timeseries"
11451145
},
1146+
{
1147+
"datasource": {
1148+
"type": "prometheus",
1149+
"uid": "${DS_PROMETHEUS}"
1150+
},
1151+
"fieldConfig": {
1152+
"defaults": {
1153+
"color": {
1154+
"mode": "palette-classic"
1155+
},
1156+
"custom": {
1157+
"axisCenteredZero": false,
1158+
"axisColorMode": "text",
1159+
"axisLabel": "",
1160+
"axisPlacement": "auto",
1161+
"barAlignment": 0,
1162+
"drawStyle": "line",
1163+
"fillOpacity": 0,
1164+
"gradientMode": "none",
1165+
"hideFrom": {
1166+
"legend": false,
1167+
"tooltip": false,
1168+
"viz": false
1169+
},
1170+
"insertNulls": false,
1171+
"lineInterpolation": "linear",
1172+
"lineWidth": 1,
1173+
"pointSize": 5,
1174+
"scaleDistribution": {
1175+
"type": "linear"
1176+
},
1177+
"showPoints": "auto",
1178+
"spanNulls": false,
1179+
"stacking": {
1180+
"group": "A",
1181+
"mode": "none"
1182+
},
1183+
"thresholdsStyle": {
1184+
"mode": "off"
1185+
}
1186+
},
1187+
"links": [],
1188+
"mappings": [],
1189+
"thresholds": {
1190+
"mode": "absolute",
1191+
"steps": [
1192+
{
1193+
"color": "green",
1194+
"value": null
1195+
},
1196+
{
1197+
"color": "red",
1198+
"value": 80
1199+
}
1200+
]
1201+
},
1202+
"unit": "s"
1203+
},
1204+
"overrides": []
1205+
},
1206+
"gridPos": {
1207+
"h": 7,
1208+
"w": 12,
1209+
"x": 0,
1210+
"y": 29
1211+
},
1212+
"id": 26,
1213+
"options": {
1214+
"alertThreshold": true,
1215+
"legend": {
1216+
"calcs": [],
1217+
"displayMode": "list",
1218+
"placement": "bottom",
1219+
"showLegend": true
1220+
},
1221+
"tooltip": {
1222+
"mode": "single",
1223+
"sort": "none"
1224+
}
1225+
},
1226+
"pluginVersion": "10.1.10",
1227+
"targets": [
1228+
{
1229+
"datasource": {
1230+
"type": "prometheus",
1231+
"uid": "${DS_PROMETHEUS}"
1232+
},
1233+
"editorMode": "code",
1234+
"exemplar": true,
1235+
"expr":
1236+
"irate(dragonfly_reply_duration_seconds{namespace=\"$namespace\",pod=~\"$pod_name\"}[$__interval]) / irate(dragonfly_reply_total{namespace=\"$namespace\",pod=~\"$pod_name\"}[$__interval])",
1237+
"format": "time_series",
1238+
"interval": "",
1239+
"intervalFactor": 2,
1240+
"legendFormat": "{{ pod }} input",
1241+
"range": true,
1242+
"refId": "A",
1243+
"step": 240
1244+
}
1245+
],
1246+
"title": "Reply Latency",
1247+
"type": "timeseries"
1248+
},
11461249
{
11471250
"datasource": {
11481251
"type": "prometheus",
@@ -1422,7 +1525,8 @@
14221525
"mode": "absolute",
14231526
"steps": [
14241527
{
1425-
"color": "green"
1528+
"color": "green",
1529+
"value": null
14261530
},
14271531
{
14281532
"color": "red",
@@ -1536,7 +1640,8 @@
15361640
"mode": "absolute",
15371641
"steps": [
15381642
{
1539-
"color": "green"
1643+
"color": "green",
1644+
"value": null
15401645
},
15411646
{
15421647
"color": "red",
@@ -1574,8 +1679,7 @@
15741679
"uid": "${DS_PROMETHEUS}"
15751680
},
15761681
"editorMode": "code",
1577-
"expr":
1578-
"dragonfly_pipeline_queue_length{namespace=\"$namespace\",pod=~\"$pod_name\"}/dragonfly_connected_clients{namespace=\"$namespace\",pod=~\"$pod_name\"}",
1682+
"expr": "dragonfly_pipeline_queue_length{namespace=\"$namespace\",pod=~\"$pod_name\"}",
15791683
"instant": false,
15801684
"legendFormat": "avr_pipeline_depth",
15811685
"range": true,
@@ -1631,7 +1735,8 @@
16311735
"mode": "absolute",
16321736
"steps": [
16331737
{
1634-
"color": "green"
1738+
"color": "green",
1739+
"value": null
16351740
},
16361741
{
16371742
"color": "red",
@@ -1740,7 +1845,8 @@
17401845
"mode": "absolute",
17411846
"steps": [
17421847
{
1743-
"color": "green"
1848+
"color": "green",
1849+
"value": null
17441850
},
17451851
{
17461852
"color": "red",

0 commit comments

Comments
 (0)