Skip to content

Commit 22de39d

Browse files
committed
Revert "Revert "chore(performance-issues): Create experimental N+1 API Call detector (#90070)""
This reverts commit 657d08b.
1 parent 8fd6e35 commit 22de39d

File tree

10 files changed

+802
-21
lines changed

10 files changed

+802
-21
lines changed

src/sentry/issues/grouptype.py

+10
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,16 @@ class PerformanceNPlusOneAPICallsGroupType(GroupType):
382382
released = True
383383

384384

385+
@dataclass(frozen=True)
386+
class PerformanceNPlusOneAPICallsExperimentalGroupType(GroupType):
387+
type_id = 1910
388+
slug = "performance_n_plus_one_api_calls_experimental"
389+
description = "N+1 API Call (Experimental)"
390+
category = GroupCategory.PERFORMANCE.value
391+
default_priority = PriorityLevel.LOW
392+
released = False
393+
394+
385395
@dataclass(frozen=True)
386396
class PerformanceMNPlusOneDBQueriesGroupType(PerformanceGroupTypeDefaults, GroupType):
387397
type_id = 1011

src/sentry/notifications/utils/__init__.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from sentry.integrations.services.integration import integration_service
2222
from sentry.issues.grouptype import (
2323
PerformanceConsecutiveDBQueriesGroupType,
24+
PerformanceNPlusOneAPICallsExperimentalGroupType,
2425
PerformanceNPlusOneAPICallsGroupType,
2526
PerformanceRenderBlockingAssetSpanGroupType,
2627
)
@@ -336,7 +337,7 @@ def occurrence_perf_to_email_html(context: Any) -> str:
336337

337338

338339
def get_spans(
339-
entries: list[dict[str, list[dict[str, str | float]] | str]]
340+
entries: list[dict[str, list[dict[str, str | float]] | str]],
340341
) -> list[dict[str, str | float]] | None:
341342
"""Get the given event's spans"""
342343
if not len(entries):
@@ -496,7 +497,10 @@ def from_problem_and_spans(
496497
spans: list[dict[str, str | float]] | None,
497498
event: Event | None = None,
498499
) -> PerformanceProblemContext:
499-
if problem.type == PerformanceNPlusOneAPICallsGroupType:
500+
if problem.type in (
501+
PerformanceNPlusOneAPICallsGroupType,
502+
PerformanceNPlusOneAPICallsExperimentalGroupType,
503+
):
500504
return NPlusOneAPICallProblemContext(problem, spans, event)
501505
if problem.type == PerformanceConsecutiveDBQueriesGroupType:
502506
return ConsecutiveDBQueriesProblemContext(problem, spans, event)
Original file line numberDiff line numberDiff line change
@@ -1,14 +0,0 @@
1-
from .consecutive_db_detector import ConsecutiveDBSpanDetector # NOQA
2-
from .consecutive_http_detector import ConsecutiveHTTPSpanDetector # NOQA
3-
from .http_overhead_detector import HTTPOverheadDetector # NOQA
4-
from .io_main_thread_detector import DBMainThreadDetector, FileIOMainThreadDetector # NOQA
5-
from .large_payload_detector import LargeHTTPPayloadDetector # NOQA
6-
from .mn_plus_one_db_span_detector import MNPlusOneDBSpanDetector # NOQA
7-
from .n_plus_one_api_calls_detector import NPlusOneAPICallsDetector # NOQA
8-
from .n_plus_one_db_span_detector import ( # NOQA
9-
NPlusOneDBSpanDetector,
10-
NPlusOneDBSpanDetectorExtended,
11-
)
12-
from .render_blocking_asset_span_detector import RenderBlockingAssetSpanDetector # NOQA
13-
from .slow_db_query_detector import SlowDBQueryDetector # NOQA
14-
from .uncompressed_asset_detector import UncompressedAssetSpanDetector # NOQA

src/sentry/utils/performance_issues/detectors/experiments/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
from __future__ import annotations
2+
3+
import hashlib
4+
import os
5+
from collections import defaultdict
6+
from collections.abc import Mapping, Sequence
7+
from datetime import timedelta
8+
from typing import Any
9+
from urllib.parse import parse_qs, urlparse
10+
11+
from django.utils.encoding import force_bytes
12+
13+
from sentry.issues.grouptype import PerformanceNPlusOneAPICallsExperimentalGroupType
14+
from sentry.issues.issue_occurrence import IssueEvidence
15+
from sentry.models.organization import Organization
16+
from sentry.models.project import Project
17+
from sentry.utils.performance_issues.base import (
18+
DetectorType,
19+
PerformanceDetector,
20+
fingerprint_http_spans,
21+
get_notification_attachment_body,
22+
get_span_evidence_value,
23+
get_url_from_span,
24+
parameterize_url,
25+
)
26+
from sentry.utils.performance_issues.detectors.utils import get_total_span_duration
27+
from sentry.utils.performance_issues.performance_problem import PerformanceProblem
28+
from sentry.utils.performance_issues.types import PerformanceProblemsMap, Span
29+
30+
31+
class NPlusOneAPICallsExperimentalDetector(PerformanceDetector):
32+
"""
33+
Detect parallel network calls to the same endpoint.
34+
35+
[-------- transaction -----------]
36+
[-------- parent span -----------]
37+
[n0] https://service.io/resources/?id=12443
38+
[n1] https://service.io/resources/?id=13342
39+
[n2] https://service.io/resources/?id=13441
40+
...
41+
"""
42+
43+
__slots__ = ["stored_problems"]
44+
type = DetectorType.N_PLUS_ONE_API_CALLS
45+
settings_key = DetectorType.N_PLUS_ONE_API_CALLS
46+
47+
def __init__(self, settings: dict[DetectorType, Any], event: dict[str, Any]) -> None:
48+
super().__init__(settings, event)
49+
50+
# TODO: Only store the span IDs and timestamps instead of entire span objects
51+
self.stored_problems: PerformanceProblemsMap = {}
52+
self.spans: list[Span] = []
53+
self.span_hashes: dict[str, str | None] = {}
54+
55+
def visit_span(self, span: Span) -> None:
56+
if not NPlusOneAPICallsExperimentalDetector.is_span_eligible(span):
57+
return
58+
59+
op = span.get("op", None)
60+
if op not in self.settings.get("allowed_span_ops", []):
61+
return
62+
63+
self.span_hashes[span["span_id"]] = get_span_hash(span)
64+
65+
previous_span = self.spans[-1] if len(self.spans) > 0 else None
66+
67+
if previous_span is None:
68+
self.spans.append(span)
69+
elif self._spans_are_concurrent(previous_span, span) and self._spans_are_similar(
70+
previous_span, span
71+
):
72+
self.spans.append(span)
73+
else:
74+
self._maybe_store_problem()
75+
self.spans = [span]
76+
77+
def is_creation_allowed_for_organization(self, organization: Organization) -> bool:
78+
return True
79+
80+
def is_creation_allowed_for_project(self, project: Project) -> bool:
81+
return self.settings["detection_enabled"]
82+
83+
@classmethod
84+
def is_event_eligible(cls, event: dict[str, Any], project: Project | None = None) -> bool:
85+
trace_op = event.get("contexts", {}).get("trace", {}).get("op")
86+
if trace_op and trace_op not in ["navigation", "pageload", "ui.load", "ui.action"]:
87+
return False
88+
89+
return True
90+
91+
@classmethod
92+
def is_span_eligible(cls, span: Span) -> bool:
93+
span_id = span.get("span_id", None)
94+
op = span.get("op", None)
95+
hash = span.get("hash", None)
96+
97+
if not span_id or not op or not hash:
98+
return False
99+
100+
description = span.get("description")
101+
if not description:
102+
return False
103+
104+
if description.strip()[:3].upper() != "GET":
105+
return False
106+
107+
url = get_url_from_span(span)
108+
109+
# GraphQL URLs have complicated queries in them. Until we parse those
110+
# queries to check for what's duplicated, we can't tell what is being
111+
# duplicated. Ignore them for now
112+
if "graphql" in url:
113+
return False
114+
115+
# Next.js infixes its data URLs with a build ID. (e.g.,
116+
# /_next/data/<uuid>/some-endpoint) This causes a fingerprinting
117+
# explosion, since every deploy would change this ID and create new
118+
# fingerprints. Since we're not parameterizing URLs yet, we need to
119+
# exclude them
120+
if "_next/data" in url:
121+
return False
122+
123+
# Next.js error pages cause an N+1 API Call that isn't useful to anyone
124+
if "__nextjs_original-stack-frame" in url:
125+
return False
126+
127+
if not url:
128+
return False
129+
130+
# Once most users update their SDKs to use the latest standard, we
131+
# won't have to do this, since the URLs will be sent in as `span.data`
132+
# in a parsed format
133+
parsed_url = urlparse(str(url))
134+
135+
# Ignore anything that looks like an asset. Some frameworks (and apps)
136+
# fetch assets via XHR, which is not our concern
137+
_pathname, extension = os.path.splitext(parsed_url.path)
138+
if extension and extension in [".js", ".css", ".svg", ".png", ".mp3", ".jpg", ".jpeg"]:
139+
return False
140+
141+
return True
142+
143+
def on_complete(self) -> None:
144+
self._maybe_store_problem()
145+
self.spans = []
146+
147+
def _maybe_store_problem(self) -> None:
148+
if len(self.spans) < 1:
149+
return
150+
151+
if len(self.spans) < self.settings["count"]:
152+
return
153+
154+
total_duration = get_total_span_duration(self.spans)
155+
if total_duration < self.settings["total_duration"]:
156+
return
157+
158+
last_span = self.spans[-1]
159+
160+
fingerprint = self._fingerprint()
161+
if not fingerprint:
162+
return
163+
164+
offender_span_ids = [span["span_id"] for span in self.spans]
165+
166+
self.stored_problems[fingerprint] = PerformanceProblem(
167+
fingerprint=fingerprint,
168+
op=last_span["op"],
169+
desc=os.path.commonprefix([span.get("description", "") or "" for span in self.spans]),
170+
type=PerformanceNPlusOneAPICallsExperimentalGroupType,
171+
cause_span_ids=[],
172+
parent_span_ids=[last_span.get("parent_span_id", None)],
173+
offender_span_ids=offender_span_ids,
174+
evidence_data={
175+
"op": last_span["op"],
176+
"cause_span_ids": [],
177+
"parent_span_ids": [last_span.get("parent_span_id", None)],
178+
"offender_span_ids": offender_span_ids,
179+
"transaction_name": self._event.get("transaction", ""),
180+
"num_repeating_spans": str(len(offender_span_ids)) if offender_span_ids else "",
181+
"repeating_spans": self._get_path_prefix(self.spans[0]),
182+
"repeating_spans_compact": get_span_evidence_value(self.spans[0], include_op=False),
183+
"parameters": self._get_parameters(),
184+
},
185+
evidence_display=[
186+
IssueEvidence(
187+
name="Offending Spans",
188+
value=get_notification_attachment_body(
189+
last_span["op"],
190+
os.path.commonprefix(
191+
[span.get("description", "") or "" for span in self.spans]
192+
),
193+
),
194+
# Has to be marked important to be displayed in the notifications
195+
important=True,
196+
)
197+
],
198+
)
199+
200+
def _get_parameters(self) -> list[str]:
201+
if not self.spans or len(self.spans) == 0:
202+
return []
203+
204+
urls = [get_url_from_span(span) for span in self.spans]
205+
206+
all_parameters: Mapping[str, list[str]] = defaultdict(list)
207+
208+
for url in urls:
209+
parsed_url = urlparse(url)
210+
parameters = parse_qs(parsed_url.query)
211+
212+
for key, value in parameters.items():
213+
all_parameters[key] += value
214+
215+
return [
216+
"{{{}: {}}}".format(key, ",".join(values)) for key, values in all_parameters.items()
217+
]
218+
219+
def _get_path_prefix(self, repeating_span: Span) -> str:
220+
if not repeating_span:
221+
return ""
222+
223+
url = get_url_from_span(repeating_span)
224+
parsed_url = urlparse(url)
225+
return parsed_url.path or ""
226+
227+
def _fingerprint(self) -> str | None:
228+
first_url = get_url_from_span(self.spans[0])
229+
parameterized_first_url = parameterize_url(first_url)
230+
231+
# Check if we parameterized the URL at all. If not, do not attempt
232+
# fingerprinting. Unparameterized URLs run too high a risk of
233+
# fingerprinting explosions. Query parameters are parameterized by
234+
# definition, so exclude them from comparison
235+
if without_query_params(parameterized_first_url) == without_query_params(first_url):
236+
return None
237+
238+
fingerprint = fingerprint_http_spans([self.spans[0]])
239+
240+
return f"1-{PerformanceNPlusOneAPICallsExperimentalGroupType.type_id}-{fingerprint}"
241+
242+
def _spans_are_concurrent(self, span_a: Span, span_b: Span) -> bool:
243+
span_a_start: int = span_a.get("start_timestamp", 0) or 0
244+
span_b_start: int = span_b.get("start_timestamp", 0) or 0
245+
246+
return timedelta(seconds=abs(span_a_start - span_b_start)) < timedelta(
247+
milliseconds=self.settings["concurrency_threshold"]
248+
)
249+
250+
def _spans_are_similar(self, span_a: Span, span_b: Span) -> bool:
251+
return (
252+
self.span_hashes[span_a["span_id"]] == self.span_hashes[span_b["span_id"]]
253+
and span_a["parent_span_id"] == span_b["parent_span_id"]
254+
)
255+
256+
257+
HTTP_METHODS = {
258+
"GET",
259+
"HEAD",
260+
"POST",
261+
"PUT",
262+
"DELETE",
263+
"CONNECT",
264+
"OPTIONS",
265+
"TRACE",
266+
"PATCH",
267+
}
268+
269+
270+
def get_span_hash(span: Span) -> str | None:
271+
if span.get("op") != "http.client":
272+
return span.get("hash")
273+
274+
parts = remove_http_client_query_string_strategy(span)
275+
if not parts:
276+
return None
277+
278+
hash = hashlib.md5()
279+
for part in parts:
280+
hash.update(force_bytes(part, errors="replace"))
281+
282+
return hash.hexdigest()[:16]
283+
284+
285+
def remove_http_client_query_string_strategy(span: Span) -> Sequence[str] | None:
286+
"""
287+
This is an inline version of the `http.client` parameterization code in
288+
`"default:2022-10-27"`, the default span grouping strategy at time of
289+
writing. It's inlined here to insulate this detector from changes in the
290+
strategy, which are coming soon.
291+
"""
292+
293+
# Check the description is of the form `<HTTP METHOD> <URL>`
294+
description = span.get("description") or ""
295+
parts = description.split(" ", 1)
296+
if len(parts) != 2:
297+
return None
298+
299+
# Ensure that this is a valid http method
300+
method, url_str = parts
301+
method = method.upper()
302+
if method not in HTTP_METHODS:
303+
return None
304+
305+
url = urlparse(url_str)
306+
return [method, url.scheme, url.netloc, url.path]
307+
308+
309+
def without_query_params(url: str) -> str:
310+
return urlparse(url)._replace(query="").geturl()

src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@
1414
from sentry.issues.issue_occurrence import IssueEvidence
1515
from sentry.models.organization import Organization
1616
from sentry.models.project import Project
17-
from sentry.utils.performance_issues.detectors.utils import get_total_span_duration
18-
19-
from ..base import (
17+
from sentry.utils.performance_issues.base import (
2018
DetectorType,
2119
PerformanceDetector,
2220
fingerprint_http_spans,
@@ -25,8 +23,9 @@
2523
get_url_from_span,
2624
parameterize_url,
2725
)
28-
from ..performance_problem import PerformanceProblem
29-
from ..types import PerformanceProblemsMap, Span
26+
from sentry.utils.performance_issues.detectors.utils import get_total_span_duration
27+
from sentry.utils.performance_issues.performance_problem import PerformanceProblem
28+
from sentry.utils.performance_issues.types import PerformanceProblemsMap, Span
3029

3130

3231
class NPlusOneAPICallsDetector(PerformanceDetector):

0 commit comments

Comments
 (0)