Skip to content

Commit 2696c93

Browse files
committed
NCBC-3896: SDK Telemetry Collection in Server
Motivation ---------- Implement AppTelemetry reporting. Changes ------- - Added AppTelemetry options - Added property AppTelemetryPath in NodesExt - AppTelemetryCollector is added to the ServiceProvider from ClusterOptions - An instance of AppTelemetryCollector is always present in the ServiceProvider, however it can be disabled which prevents it from collecting metrics. - Adjusted Unit Tests mocks to support ServiceUriProvider and Services' Client changes - Added AppTelemetry incrementing in Query, Analytics, Collection, User and Bucket Managers - Histograms are not updated for Timeouts and Cancellations, only counter. This is due to the ambiguity of the behaviour in AppTelemetry/Orphan in RFCs - Added Canonical and Alternate Hostname properties to NodeAdapter - Added alt_node property to metrics - The LightweightStopwatch is now provided by the AppTelemetryCollector, which will return null when disabled. This prevents creating a new LightweightStopWatch when it isn't needed. Change-Id: I5d8d389eb23259531e078c83a64f038649187c69 Reviewed-on: https://review.couchbase.org/c/couchbase-net-client/+/222591 Tested-by: Build Bot <build@couchbase.com> Reviewed-by: Emilien Bevierre <emilien.bevierre@couchbase.com>
1 parent 31d96f7 commit 2696c93

File tree

62 files changed

+11670
-313
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+11670
-313
lines changed

src/Couchbase/Analytics/AnalyticsClient.cs

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
using System;
2+
using System.Diagnostics;
23
using System.Diagnostics.CodeAnalysis;
34
using System.Net;
45
using System.Net.Http;
56
using System.Text;
67
using System.Threading.Tasks;
78
using Couchbase.Core;
9+
using Couchbase.Core.Diagnostics.Metrics.AppTelemetry;
810
using Couchbase.Core.Diagnostics.Tracing;
911
using Couchbase.Core.Exceptions;
1012
using Couchbase.Core.Exceptions.Analytics;
@@ -29,6 +31,7 @@ internal class AnalyticsClient : HttpServiceBase, IAnalyticsClient
2931
private readonly ITypeSerializer _typeSerializer;
3032
private readonly ILogger<AnalyticsClient> _logger;
3133
private readonly IRequestTracer _tracer;
34+
private readonly IAppTelemetryCollector _appTelemetryCollector;
3235
internal const string AnalyticsPriorityHeaderName = "Analytics-Priority";
3336

3437
[RequiresUnreferencedCode(AnalyticsRequiresUnreferencedMembersWarning)]
@@ -38,13 +41,15 @@ public AnalyticsClient(
3841
IServiceUriProvider serviceUriProvider,
3942
ITypeSerializer typeSerializer,
4043
ILogger<AnalyticsClient> logger,
41-
IRequestTracer tracer)
44+
IRequestTracer tracer,
45+
IAppTelemetryCollector appTelemetryCollector)
4246
: base(httpClientFactory)
4347
{
4448
_serviceUriProvider = serviceUriProvider ?? throw new ArgumentNullException(nameof(serviceUriProvider));
4549
_typeSerializer = typeSerializer ?? throw new ArgumentNullException(nameof(typeSerializer));
4650
_logger = logger ?? throw new ArgumentNullException(nameof(logger));
4751
_tracer = tracer;
52+
_appTelemetryCollector = appTelemetryCollector;
4853
}
4954

5055
/// <inheritdoc />
@@ -61,7 +66,11 @@ public async Task<IAnalyticsResult<T>> QueryAsync<T>(string statement, Analytics
6166
.WithLocalAddress();
6267

6368
// try get Analytics node
64-
var analyticsUri = _serviceUriProvider.GetRandomAnalyticsUri();
69+
var analyticsNode = _serviceUriProvider.GetRandomAnalyticsNode();
70+
var analyticsUri = analyticsNode.AnalyticsUri;
71+
var requestStopwatch = _appTelemetryCollector.StartNewLightweightStopwatch();
72+
TimeSpan? operationElapsed;
73+
6574
rootSpan.WithRemoteAddress(analyticsUri);
6675

6776
_logger.LogDebug("Sending analytics query with a context id {contextId} to server {searchUri}",
@@ -91,8 +100,10 @@ public async Task<IAnalyticsResult<T>> QueryAsync<T>(string statement, Analytics
91100
var httpClient = CreateHttpClient(options.TimeoutValue);
92101
try
93102
{
103+
requestStopwatch?.Restart();
94104
var response = await httpClient.SendAsync(request, HttpClientFactory.DefaultCompletionOption, options.Token)
95105
.ConfigureAwait(false);
106+
operationElapsed = requestStopwatch?.Elapsed;
96107
dispatchSpan.Dispose();
97108

98109
var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
@@ -135,6 +146,14 @@ public async Task<IAnalyticsResult<T>> QueryAsync<T>(string statement, Analytics
135146
CouchbaseException? ex = CreateExceptionForError(result, context, false);
136147
if (ex != null) { throw ex; }
137148
}
149+
150+
_appTelemetryCollector.IncrementMetrics(
151+
operationElapsed,
152+
analyticsNode.NodesAdapter.CanonicalHostname,
153+
analyticsNode.NodesAdapter.AlternateHostname,
154+
analyticsNode.NodeUuid,
155+
AppTelemetryServiceType.Analytics,
156+
AppTelemetryCounterType.Total);
138157
}
139158
catch
140159
{
@@ -148,9 +167,18 @@ public async Task<IAnalyticsResult<T>> QueryAsync<T>(string statement, Analytics
148167
}
149168
catch (OperationCanceledException e)
150169
{
170+
operationElapsed = requestStopwatch?.Elapsed;
151171
//treat as an orphaned response
152172
rootSpan.LogOrphaned();
153173

174+
_appTelemetryCollector.IncrementMetrics(
175+
operationElapsed,
176+
analyticsNode.NodesAdapter.CanonicalHostname,
177+
analyticsNode.NodesAdapter.AlternateHostname,
178+
analyticsNode.NodeUuid,
179+
AppTelemetryServiceType.Analytics,
180+
AppTelemetryCounterType.TimedOut);
181+
154182
var context = new AnalyticsErrorContext
155183
{
156184
ClientContextId = options.ClientContextIdValue,
@@ -174,9 +202,18 @@ public async Task<IAnalyticsResult<T>> QueryAsync<T>(string statement, Analytics
174202
}
175203
catch (HttpRequestException e)
176204
{
205+
operationElapsed = requestStopwatch?.Elapsed;
177206
//treat as an orphaned response
178207
rootSpan.LogOrphaned();
179208

209+
_appTelemetryCollector.IncrementMetrics(
210+
operationElapsed,
211+
analyticsNode.NodesAdapter.CanonicalHostname,
212+
analyticsNode.NodesAdapter.AlternateHostname,
213+
analyticsNode.NodeUuid,
214+
AppTelemetryServiceType.Analytics,
215+
AppTelemetryCounterType.Canceled);
216+
180217
var context = new AnalyticsErrorContext
181218
{
182219
ClientContextId = options.ClientContextIdValue,

src/Couchbase/CStringParams.cs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,18 @@ internal static class CStringParams
3131
public const string NetworkResolution = "network";
3232
public const string PreferredServerGroup = "preferred_server_group";
3333

34+
[InterfaceStability(Level.Volatile)]
35+
public const string AppTelemetryEndpoint = "app_telemetry_endpoint";
36+
37+
[InterfaceStability(Level.Volatile)]
38+
public const string AppTelemetryBackoff = "app_telemetry_backoff";
39+
40+
[InterfaceStability(Level.Volatile)]
41+
public const string AppTelemetryPingInterval = "app_telemetry_ping_interval";
42+
43+
[InterfaceStability(Level.Volatile)]
44+
public const string AppTelemetryPingTimeout = "app_telemetry_ping_timeout";
45+
3446
[InterfaceStability(Level.Uncommitted)]
3547
public const string RandomSeedNodes = "random_seed_nodes";
3648
}

src/Couchbase/Cluster.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@
3030
using Microsoft.Extensions.Logging;
3131
using AnalyticsOptions = Couchbase.Analytics.AnalyticsOptions;
3232
using Couchbase.Core.RateLimiting;
33+
using Couchbase.Core.Diagnostics.Metrics.AppTelemetry;
3334
using Couchbase.Management.Eventing.Internal;
3435
using Couchbase.Search.Queries.Simple;
3536
using Couchbase.Search.Queries.Vector;
37+
using Couchbase.Utils;
3638

3739
#nullable enable
3840

@@ -59,6 +61,7 @@ public class Cluster : ICluster, IBootstrappable
5961
private readonly IRequestTracer _tracer;
6062
private readonly IRetryStrategy _retryStrategy;
6163
private readonly MeterForwarder? _meterForwarder;
64+
private readonly IAppTelemetryCollector _appTelemetryCollector;
6265

6366
// Internal is used to provide a seam for unit tests
6467
internal LazyService<IQueryClient> LazyQueryClient;
@@ -120,6 +123,10 @@ internal Cluster(ClusterOptions clusterOptions)
120123
_meterForwarder = new MeterForwarder(meter);
121124
}
122125

126+
_appTelemetryCollector = _context.ServiceProvider.GetRequiredService<IAppTelemetryCollector>();
127+
_appTelemetryCollector.ClusterContext = _context;
128+
_appTelemetryCollector.Initialize();
129+
123130
var bootstrapperFactory = _context.ServiceProvider.GetRequiredService<IBootstrapperFactory>();
124131
_bootstrapper = bootstrapperFactory.Create(clusterOptions.BootstrapPollInterval);
125132
}
@@ -439,6 +446,7 @@ public async Task<ISearchResult> SearchAsync(
439446
{
440447
options ??= new SearchOptions();
441448
options.TimeoutValue ??= _context.ClusterOptions.SearchTimeout;
449+
options.Token = options.Token.FallbackToTimeout(options.TimeoutValue.Value)?.Token ?? options.Token;
442450

443451
ThrowIfNotBootstrapped();
444452
// The RFC asks to wait until the GlobalConfig is ready. That should be handled by ThrowIfNotBootstrapped.
@@ -621,6 +629,7 @@ public void Dispose()
621629
_bootstrapper.Dispose();
622630
_context.Dispose();
623631
_meterForwarder?.Dispose();
632+
_appTelemetryCollector.Dispose();
624633
}
625634
}
626635

src/Couchbase/ClusterOptions.cs

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
using Couchbase.Core.Logging;
2323
using Couchbase.Core.Retry;
2424
using Couchbase.Client.Transactions.Config;
25+
using Couchbase.Core.Diagnostics.Metrics.AppTelemetry;
2526
using Microsoft.Extensions.Logging;
2627
using Microsoft.Extensions.Logging.Abstractions;
2728

@@ -154,6 +155,22 @@ public string? ConnectionString
154155
{
155156
ConnectionStringValue.RandomizeSeedHosts = randomizeSeedNodes;
156157
}
158+
if (ConnectionStringValue.TryGetParameter(CStringParams.AppTelemetryEndpoint, out string appTelemetryEndpoint))
159+
{
160+
AppTelemetry.Endpoint = new Uri(appTelemetryEndpoint);
161+
}
162+
if (ConnectionStringValue.TryGetParameter(CStringParams.AppTelemetryBackoff, out TimeSpan appTelemetryBackoff))
163+
{
164+
AppTelemetry.Backoff = appTelemetryBackoff;
165+
}
166+
if (ConnectionStringValue.TryGetParameter(CStringParams.AppTelemetryPingInterval, out TimeSpan appTelemetryPingInterval))
167+
{
168+
AppTelemetry.PingInterval = appTelemetryPingInterval;
169+
}
170+
if (ConnectionStringValue.TryGetParameter(CStringParams.AppTelemetryPingTimeout, out TimeSpan appTelemetryPingTimeout))
171+
{
172+
AppTelemetry.PingTimeout = appTelemetryPingTimeout;
173+
}
157174
if (ConnectionStringValue.TryGetParameter(CStringParams.PreferredServerGroup, out string serverGroup))
158175
{
159176
PreferredServerGroup = serverGroup;
@@ -550,6 +567,74 @@ public ClusterOptions WithLoggingMeterOptions(Action<LoggingMeterOptions> config
550567
return WithLoggingMeterOptions(opts);
551568
}
552569

570+
/// <summary>
571+
/// Configures the endpoint where AppTelemetry data should be sent to.
572+
/// </summary>
573+
/// <returns>
574+
/// A <see cref="ClusterOptions"/> object for chaining.
575+
/// </returns>
576+
[InterfaceStability(Level.Volatile)]
577+
public ClusterOptions WithAppTelemetryEndpoint(Uri endpoint)
578+
{
579+
AppTelemetry.Endpoint = endpoint;
580+
AppTelemetry.Enabled = true;
581+
return this;
582+
}
583+
584+
/// <summary>
585+
/// Configures the backoff for re-connecting to the AppTelemetry endpoint via WebSockets.
586+
/// </summary>
587+
/// <returns>
588+
/// A <see cref="ClusterOptions"/> object for chaining.
589+
/// </returns>
590+
[InterfaceStability(Level.Volatile)]
591+
public ClusterOptions WithAppTelemetryBackoff(TimeSpan backoff)
592+
{
593+
AppTelemetry.Backoff = backoff;
594+
AppTelemetry.Enabled = true;
595+
return this;
596+
}
597+
598+
/// <summary>
599+
/// Enables/Disables AppTelemetry.
600+
/// </summary>
601+
/// <returns>
602+
/// A <see cref="ClusterOptions"/> object for chaining.
603+
/// </returns>
604+
[InterfaceStability(Level.Volatile)]
605+
public ClusterOptions WithAppTelemetryEnabled(bool enabled)
606+
{
607+
AppTelemetry.Enabled = enabled;
608+
return this;
609+
}
610+
611+
/// <summary>
612+
/// Configures the duration between consecutive PING commands sent to the server.
613+
/// </summary>
614+
/// <returns>
615+
/// A <see cref="ClusterOptions"/> object for chaining.</returns>
616+
[InterfaceStability(Level.Volatile)]
617+
public ClusterOptions WithAppTelemetryPingInterval(TimeSpan interval)
618+
{
619+
AppTelemetry.PingInterval = interval;
620+
AppTelemetry.Enabled = true;
621+
return this;
622+
}
623+
624+
/// <summary>
625+
/// Configures the maximum timeout for the server to respond to a PING.
626+
/// </summary>
627+
/// <param name="timeout"></param>
628+
/// <returns>
629+
/// A <see cref="ClusterOptions"/> object for chaining.</returns>
630+
[InterfaceStability(Level.Volatile)]
631+
public ClusterOptions WithAppTelemetryPingTimeout(TimeSpan timeout)
632+
{
633+
AppTelemetry.PingTimeout = timeout;
634+
AppTelemetry.Enabled = true;
635+
return this;
636+
}
637+
553638
#endregion
554639

555640
/// <summary>
@@ -843,6 +928,8 @@ public bool IgnoreRemoteCertificateNameMismatch
843928
/// <inheritdoc cref="ExperimentalOptions"/>
844929
public ExperimentalOptions Experiments { get; set; } = new();
845930

931+
public AppTelemetryOptions AppTelemetry { get; set; } = new();
932+
846933
/// <summary>
847934
/// Provides a default implementation of <see cref="ClusterOptions"/>.
848935
/// </summary>

0 commit comments

Comments
 (0)