|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import hashlib |
| 4 | +import os |
| 5 | +from collections import defaultdict |
| 6 | +from collections.abc import Mapping, Sequence |
| 7 | +from datetime import timedelta |
| 8 | +from typing import Any |
| 9 | +from urllib.parse import parse_qs, urlparse |
| 10 | + |
| 11 | +from django.utils.encoding import force_bytes |
| 12 | + |
| 13 | +from sentry.issues.grouptype import PerformanceNPlusOneAPICallsExperimentalGroupType |
| 14 | +from sentry.issues.issue_occurrence import IssueEvidence |
| 15 | +from sentry.models.organization import Organization |
| 16 | +from sentry.models.project import Project |
| 17 | +from sentry.utils.performance_issues.base import ( |
| 18 | + DetectorType, |
| 19 | + PerformanceDetector, |
| 20 | + fingerprint_http_spans, |
| 21 | + get_notification_attachment_body, |
| 22 | + get_span_evidence_value, |
| 23 | + get_url_from_span, |
| 24 | + parameterize_url, |
| 25 | +) |
| 26 | +from sentry.utils.performance_issues.detectors.utils import get_total_span_duration |
| 27 | +from sentry.utils.performance_issues.performance_problem import PerformanceProblem |
| 28 | +from sentry.utils.performance_issues.types import PerformanceProblemsMap, Span |
| 29 | + |
| 30 | + |
| 31 | +class NPlusOneAPICallsExperimentalDetector(PerformanceDetector): |
| 32 | + """ |
| 33 | + Detect parallel network calls to the same endpoint. |
| 34 | +
|
| 35 | + [-------- transaction -----------] |
| 36 | + [-------- parent span -----------] |
| 37 | + [n0] https://service.io/resources/?id=12443 |
| 38 | + [n1] https://service.io/resources/?id=13342 |
| 39 | + [n2] https://service.io/resources/?id=13441 |
| 40 | + ... |
| 41 | + """ |
| 42 | + |
| 43 | + __slots__ = ["stored_problems"] |
| 44 | + type = DetectorType.N_PLUS_ONE_API_CALLS |
| 45 | + settings_key = DetectorType.N_PLUS_ONE_API_CALLS |
| 46 | + |
| 47 | + def __init__(self, settings: dict[DetectorType, Any], event: dict[str, Any]) -> None: |
| 48 | + super().__init__(settings, event) |
| 49 | + |
| 50 | + # TODO: Only store the span IDs and timestamps instead of entire span objects |
| 51 | + self.stored_problems: PerformanceProblemsMap = {} |
| 52 | + self.spans: list[Span] = [] |
| 53 | + self.span_hashes: dict[str, str | None] = {} |
| 54 | + |
| 55 | + def visit_span(self, span: Span) -> None: |
| 56 | + if not NPlusOneAPICallsExperimentalDetector.is_span_eligible(span): |
| 57 | + return |
| 58 | + |
| 59 | + op = span.get("op", None) |
| 60 | + if op not in self.settings.get("allowed_span_ops", []): |
| 61 | + return |
| 62 | + |
| 63 | + self.span_hashes[span["span_id"]] = get_span_hash(span) |
| 64 | + |
| 65 | + previous_span = self.spans[-1] if len(self.spans) > 0 else None |
| 66 | + |
| 67 | + if previous_span is None: |
| 68 | + self.spans.append(span) |
| 69 | + elif self._spans_are_concurrent(previous_span, span) and self._spans_are_similar( |
| 70 | + previous_span, span |
| 71 | + ): |
| 72 | + self.spans.append(span) |
| 73 | + else: |
| 74 | + self._maybe_store_problem() |
| 75 | + self.spans = [span] |
| 76 | + |
| 77 | + def is_creation_allowed_for_organization(self, organization: Organization) -> bool: |
| 78 | + return True |
| 79 | + |
| 80 | + def is_creation_allowed_for_project(self, project: Project) -> bool: |
| 81 | + return self.settings["detection_enabled"] |
| 82 | + |
| 83 | + @classmethod |
| 84 | + def is_event_eligible(cls, event: dict[str, Any], project: Project | None = None) -> bool: |
| 85 | + trace_op = event.get("contexts", {}).get("trace", {}).get("op") |
| 86 | + if trace_op and trace_op not in ["navigation", "pageload", "ui.load", "ui.action"]: |
| 87 | + return False |
| 88 | + |
| 89 | + return True |
| 90 | + |
| 91 | + @classmethod |
| 92 | + def is_span_eligible(cls, span: Span) -> bool: |
| 93 | + span_id = span.get("span_id", None) |
| 94 | + op = span.get("op", None) |
| 95 | + hash = span.get("hash", None) |
| 96 | + |
| 97 | + if not span_id or not op or not hash: |
| 98 | + return False |
| 99 | + |
| 100 | + description = span.get("description") |
| 101 | + if not description: |
| 102 | + return False |
| 103 | + |
| 104 | + if description.strip()[:3].upper() != "GET": |
| 105 | + return False |
| 106 | + |
| 107 | + url = get_url_from_span(span) |
| 108 | + |
| 109 | + # GraphQL URLs have complicated queries in them. Until we parse those |
| 110 | + # queries to check for what's duplicated, we can't tell what is being |
| 111 | + # duplicated. Ignore them for now |
| 112 | + if "graphql" in url: |
| 113 | + return False |
| 114 | + |
| 115 | + # Next.js infixes its data URLs with a build ID. (e.g., |
| 116 | + # /_next/data/<uuid>/some-endpoint) This causes a fingerprinting |
| 117 | + # explosion, since every deploy would change this ID and create new |
| 118 | + # fingerprints. Since we're not parameterizing URLs yet, we need to |
| 119 | + # exclude them |
| 120 | + if "_next/data" in url: |
| 121 | + return False |
| 122 | + |
| 123 | + # Next.js error pages cause an N+1 API Call that isn't useful to anyone |
| 124 | + if "__nextjs_original-stack-frame" in url: |
| 125 | + return False |
| 126 | + |
| 127 | + if not url: |
| 128 | + return False |
| 129 | + |
| 130 | + # Once most users update their SDKs to use the latest standard, we |
| 131 | + # won't have to do this, since the URLs will be sent in as `span.data` |
| 132 | + # in a parsed format |
| 133 | + parsed_url = urlparse(str(url)) |
| 134 | + |
| 135 | + # Ignore anything that looks like an asset. Some frameworks (and apps) |
| 136 | + # fetch assets via XHR, which is not our concern |
| 137 | + _pathname, extension = os.path.splitext(parsed_url.path) |
| 138 | + if extension and extension in [".js", ".css", ".svg", ".png", ".mp3", ".jpg", ".jpeg"]: |
| 139 | + return False |
| 140 | + |
| 141 | + return True |
| 142 | + |
| 143 | + def on_complete(self) -> None: |
| 144 | + self._maybe_store_problem() |
| 145 | + self.spans = [] |
| 146 | + |
| 147 | + def _maybe_store_problem(self) -> None: |
| 148 | + if len(self.spans) < 1: |
| 149 | + return |
| 150 | + |
| 151 | + if len(self.spans) < self.settings["count"]: |
| 152 | + return |
| 153 | + |
| 154 | + total_duration = get_total_span_duration(self.spans) |
| 155 | + if total_duration < self.settings["total_duration"]: |
| 156 | + return |
| 157 | + |
| 158 | + last_span = self.spans[-1] |
| 159 | + |
| 160 | + fingerprint = self._fingerprint() |
| 161 | + if not fingerprint: |
| 162 | + return |
| 163 | + |
| 164 | + offender_span_ids = [span["span_id"] for span in self.spans] |
| 165 | + |
| 166 | + self.stored_problems[fingerprint] = PerformanceProblem( |
| 167 | + fingerprint=fingerprint, |
| 168 | + op=last_span["op"], |
| 169 | + desc=os.path.commonprefix([span.get("description", "") or "" for span in self.spans]), |
| 170 | + type=PerformanceNPlusOneAPICallsExperimentalGroupType, |
| 171 | + cause_span_ids=[], |
| 172 | + parent_span_ids=[last_span.get("parent_span_id", None)], |
| 173 | + offender_span_ids=offender_span_ids, |
| 174 | + evidence_data={ |
| 175 | + "op": last_span["op"], |
| 176 | + "cause_span_ids": [], |
| 177 | + "parent_span_ids": [last_span.get("parent_span_id", None)], |
| 178 | + "offender_span_ids": offender_span_ids, |
| 179 | + "transaction_name": self._event.get("transaction", ""), |
| 180 | + "num_repeating_spans": str(len(offender_span_ids)) if offender_span_ids else "", |
| 181 | + "repeating_spans": self._get_path_prefix(self.spans[0]), |
| 182 | + "repeating_spans_compact": get_span_evidence_value(self.spans[0], include_op=False), |
| 183 | + "parameters": self._get_parameters(), |
| 184 | + }, |
| 185 | + evidence_display=[ |
| 186 | + IssueEvidence( |
| 187 | + name="Offending Spans", |
| 188 | + value=get_notification_attachment_body( |
| 189 | + last_span["op"], |
| 190 | + os.path.commonprefix( |
| 191 | + [span.get("description", "") or "" for span in self.spans] |
| 192 | + ), |
| 193 | + ), |
| 194 | + # Has to be marked important to be displayed in the notifications |
| 195 | + important=True, |
| 196 | + ) |
| 197 | + ], |
| 198 | + ) |
| 199 | + |
| 200 | + def _get_parameters(self) -> list[str]: |
| 201 | + if not self.spans or len(self.spans) == 0: |
| 202 | + return [] |
| 203 | + |
| 204 | + urls = [get_url_from_span(span) for span in self.spans] |
| 205 | + |
| 206 | + all_parameters: Mapping[str, list[str]] = defaultdict(list) |
| 207 | + |
| 208 | + for url in urls: |
| 209 | + parsed_url = urlparse(url) |
| 210 | + parameters = parse_qs(parsed_url.query) |
| 211 | + |
| 212 | + for key, value in parameters.items(): |
| 213 | + all_parameters[key] += value |
| 214 | + |
| 215 | + return [ |
| 216 | + "{{{}: {}}}".format(key, ",".join(values)) for key, values in all_parameters.items() |
| 217 | + ] |
| 218 | + |
| 219 | + def _get_path_prefix(self, repeating_span: Span) -> str: |
| 220 | + if not repeating_span: |
| 221 | + return "" |
| 222 | + |
| 223 | + url = get_url_from_span(repeating_span) |
| 224 | + parsed_url = urlparse(url) |
| 225 | + return parsed_url.path or "" |
| 226 | + |
| 227 | + def _fingerprint(self) -> str | None: |
| 228 | + first_url = get_url_from_span(self.spans[0]) |
| 229 | + parameterized_first_url = parameterize_url(first_url) |
| 230 | + |
| 231 | + # Check if we parameterized the URL at all. If not, do not attempt |
| 232 | + # fingerprinting. Unparameterized URLs run too high a risk of |
| 233 | + # fingerprinting explosions. Query parameters are parameterized by |
| 234 | + # definition, so exclude them from comparison |
| 235 | + if without_query_params(parameterized_first_url) == without_query_params(first_url): |
| 236 | + return None |
| 237 | + |
| 238 | + fingerprint = fingerprint_http_spans([self.spans[0]]) |
| 239 | + |
| 240 | + return f"1-{PerformanceNPlusOneAPICallsExperimentalGroupType.type_id}-{fingerprint}" |
| 241 | + |
| 242 | + def _spans_are_concurrent(self, span_a: Span, span_b: Span) -> bool: |
| 243 | + span_a_start: int = span_a.get("start_timestamp", 0) or 0 |
| 244 | + span_b_start: int = span_b.get("start_timestamp", 0) or 0 |
| 245 | + |
| 246 | + return timedelta(seconds=abs(span_a_start - span_b_start)) < timedelta( |
| 247 | + milliseconds=self.settings["concurrency_threshold"] |
| 248 | + ) |
| 249 | + |
| 250 | + def _spans_are_similar(self, span_a: Span, span_b: Span) -> bool: |
| 251 | + return ( |
| 252 | + self.span_hashes[span_a["span_id"]] == self.span_hashes[span_b["span_id"]] |
| 253 | + and span_a["parent_span_id"] == span_b["parent_span_id"] |
| 254 | + ) |
| 255 | + |
| 256 | + |
| 257 | +HTTP_METHODS = { |
| 258 | + "GET", |
| 259 | + "HEAD", |
| 260 | + "POST", |
| 261 | + "PUT", |
| 262 | + "DELETE", |
| 263 | + "CONNECT", |
| 264 | + "OPTIONS", |
| 265 | + "TRACE", |
| 266 | + "PATCH", |
| 267 | +} |
| 268 | + |
| 269 | + |
| 270 | +def get_span_hash(span: Span) -> str | None: |
| 271 | + if span.get("op") != "http.client": |
| 272 | + return span.get("hash") |
| 273 | + |
| 274 | + parts = remove_http_client_query_string_strategy(span) |
| 275 | + if not parts: |
| 276 | + return None |
| 277 | + |
| 278 | + hash = hashlib.md5() |
| 279 | + for part in parts: |
| 280 | + hash.update(force_bytes(part, errors="replace")) |
| 281 | + |
| 282 | + return hash.hexdigest()[:16] |
| 283 | + |
| 284 | + |
| 285 | +def remove_http_client_query_string_strategy(span: Span) -> Sequence[str] | None: |
| 286 | + """ |
| 287 | + This is an inline version of the `http.client` parameterization code in |
| 288 | + `"default:2022-10-27"`, the default span grouping strategy at time of |
| 289 | + writing. It's inlined here to insulate this detector from changes in the |
| 290 | + strategy, which are coming soon. |
| 291 | + """ |
| 292 | + |
| 293 | + # Check the description is of the form `<HTTP METHOD> <URL>` |
| 294 | + description = span.get("description") or "" |
| 295 | + parts = description.split(" ", 1) |
| 296 | + if len(parts) != 2: |
| 297 | + return None |
| 298 | + |
| 299 | + # Ensure that this is a valid http method |
| 300 | + method, url_str = parts |
| 301 | + method = method.upper() |
| 302 | + if method not in HTTP_METHODS: |
| 303 | + return None |
| 304 | + |
| 305 | + url = urlparse(url_str) |
| 306 | + return [method, url.scheme, url.netloc, url.path] |
| 307 | + |
| 308 | + |
| 309 | +def without_query_params(url: str) -> str: |
| 310 | + return urlparse(url)._replace(query="").geturl() |
0 commit comments