Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(sentryapp): new SentryAppWebhookRequestsEndpoint control endpoint #81676

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/sentry/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,9 @@
from sentry.sentry_apps.api.endpoints.sentry_app_requests import SentryAppRequestsEndpoint
from sentry.sentry_apps.api.endpoints.sentry_app_rotate_secret import SentryAppRotateSecretEndpoint
from sentry.sentry_apps.api.endpoints.sentry_app_stats_details import SentryAppStatsEndpoint
from sentry.sentry_apps.api.endpoints.sentry_app_webhook_requests import (
SentryAppWebhookRequestsEndpoint,
)
from sentry.sentry_apps.api.endpoints.sentry_apps import SentryAppsEndpoint
from sentry.sentry_apps.api.endpoints.sentry_apps_stats import SentryAppsStatsEndpoint
from sentry.sentry_apps.api.endpoints.sentry_internal_app_token_details import (
Expand Down Expand Up @@ -2906,6 +2909,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
SentryAppPublishRequestEndpoint.as_view(),
name="sentry-api-0-sentry-app-publish-request",
),
re_path(
r"^(?P<sentry_app_id_or_slug>[^\/]+)/webhook-requests/$",
SentryAppWebhookRequestsEndpoint.as_view(),
name="sentry-api-0-sentry-app-webhook-requests",
),
# The following a region endpoints as interactions and request logs
# are per-region.
re_path(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
"ProjectUsersEndpoint",
"ReleaseThresholdEndpoint",
"SentryAppRequestsEndpoint",
"SentryAppWebhookRequestsEndpoint",
"SentryAppsStatsEndpoint",
"SentryInternalAppTokensEndpoint",
"TeamGroupsOldEndpoint",
Expand Down
136 changes: 136 additions & 0 deletions src/sentry/sentry_apps/api/endpoints/sentry_app_webhook_requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from datetime import datetime, timezone

from dateutil.parser import parse as parse_date
from rest_framework import serializers, status
from rest_framework.request import Request
from rest_framework.response import Response

from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
from sentry.api.base import control_silo_endpoint
from sentry.api.serializers import serialize
from sentry.models.organizationmapping import OrganizationMapping
from sentry.sentry_apps.api.bases.sentryapps import SentryAppBaseEndpoint, SentryAppStatsPermission
from sentry.sentry_apps.api.serializers.sentry_app_webhook_request import (
SentryAppWebhookRequestSerializer,
)
from sentry.sentry_apps.api.utils.webhook_requests import (
BufferedRequest,
DatetimeOrganizationFilterArgs,
get_buffer_requests_from_control,
get_buffer_requests_from_regions,
)
from sentry.sentry_apps.models.sentry_app import SentryApp
from sentry.sentry_apps.services.app_request import SentryAppRequestFilterArgs
from sentry.utils.sentry_apps import EXTENDED_VALID_EVENTS


class IncomingRequestSerializer(serializers.Serializer):
date_format = "%Y-%m-%d %H:%M:%S"
eventType = serializers.ChoiceField(
choices=EXTENDED_VALID_EVENTS,
required=False,
)
errorsOnly = serializers.BooleanField(default=False, required=False)
organizationSlug = serializers.CharField(required=False)
start = serializers.DateTimeField(
format=date_format,
default=datetime.strptime("2000-01-01 00:00:00", date_format).replace(tzinfo=timezone.utc),
default_timezone=timezone.utc,
required=False,
)
end = serializers.DateTimeField(
format=date_format, default=None, default_timezone=timezone.utc, required=False
)

def validate(self, data):
if "start" in data and "end" in data and data["start"] > data["end"]:
raise serializers.ValidationError("Invalid timestamp (start must be before end).")
return data

def validate_end(self, end):
if end is None:
end = datetime.now(tz=timezone.utc)
ameliahsu marked this conversation as resolved.
Show resolved Hide resolved
return end
Comment on lines +51 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: where does this get used ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is called when .is_valid() is called (same as validate above). See docs here



@control_silo_endpoint
class SentryAppWebhookRequestsEndpoint(SentryAppBaseEndpoint):
owner = ApiOwner.ECOSYSTEM
publish_status = {
"GET": ApiPublishStatus.EXPERIMENTAL,
}
permission_classes = (SentryAppStatsPermission,)

def get(self, request: Request, sentry_app: SentryApp) -> Response:
"""
:qparam string eventType: Optionally specify a specific event type to filter requests
:qparam bool errorsOnly: If this is true, only return error/warning requests (300-599)
:qparam string organizationSlug: Optionally specify an org slug to filter requests
:qparam string start: Optionally specify a date to begin at. Format must be YYYY-MM-DD HH:MM:SS
:qparam string end: Optionally specify a date to end at. Format must be YYYY-MM-DD HH:MM:SS
"""
serializer = IncomingRequestSerializer(data=request.GET)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to gate access here with a feature flag maybe? We may want to validate the behavior of this endpoint before releasing it to the public.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would marking the endpoint as experimental be enough? I think we could update the callee side to use this new endpoint to test then roll the cutover out to few customers before going public.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update: We decided to feature flag this endpoint while we test fetching requests from multiple regions

if not serializer.is_valid():
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
serialized = serializer.validated_data

event_type = serialized.get("eventType")
errors_only = serialized.get("errorsOnly")
org_slug = serialized.get("organizationSlug")
start_time = serialized.get("start")
end_time = serialized.get("end")

organization = None
if org_slug:
try:
organization = OrganizationMapping.objects.get(slug=org_slug)
except OrganizationMapping.DoesNotExist:
return Response({"detail": "Invalid organization."}, status=400)

requests: list[BufferedRequest] = []
control_filter: SentryAppRequestFilterArgs = {}
region_filter: SentryAppRequestFilterArgs = {}
control_filter["errors_only"] = region_filter["errors_only"] = errors_only
datetime_org_filter: DatetimeOrganizationFilterArgs = {
"start_time": start_time,
"end_time": end_time,
"organization": organization,
}

# If event type is installation.created or installation.deleted, we only need to fetch requests from the control buffer
if event_type == "installation.created" or event_type == "installation.deleted":
control_filter["event"] = event_type
requests.extend(
get_buffer_requests_from_control(sentry_app, control_filter, datetime_org_filter)
)
# If event type has been specified, we only need to fetch requests from region buffers
elif event_type:
region_filter["event"] = event_type
requests.extend(
get_buffer_requests_from_regions(sentry_app.id, region_filter, datetime_org_filter)
)
else:
control_filter["event"] = [
"installation.created",
"installation.deleted",
]
requests.extend(
get_buffer_requests_from_control(sentry_app, control_filter, datetime_org_filter)
)
region_filter["event"] = list(
set(EXTENDED_VALID_EVENTS)
- {
"installation.created",
"installation.deleted",
}
)
requests.extend(
get_buffer_requests_from_regions(sentry_app.id, region_filter, datetime_org_filter)
)

requests.sort(key=lambda x: parse_date(x.data.date), reverse=True)

return Response(
serialize(requests, request.user, SentryAppWebhookRequestSerializer(sentry_app))
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from __future__ import annotations

from collections.abc import Mapping, MutableMapping, Sequence
from typing import Any, NotRequired, TypedDict

from sentry.api.serializers import Serializer
from sentry.hybridcloud.services.organization_mapping import (
RpcOrganizationMapping,
organization_mapping_service,
)
from sentry.sentry_apps.api.utils.webhook_requests import BufferedRequest
from sentry.sentry_apps.models.sentry_app import SentryApp
from sentry.users.models.user import User
from sentry.users.services.user import RpcUser


class _BufferedRequestAttrs(TypedDict):
organization: RpcOrganizationMapping | None


class OrganizationResponse(TypedDict):
name: str
slug: str


class SentryAppWebhookRequestSerializerResponse(TypedDict):
webhookUrl: str
sentryAppSlug: str
eventType: str
date: str
responseCode: int
organization: NotRequired[OrganizationResponse]


class SentryAppWebhookRequestSerializer(Serializer):
def __init__(self, sentry_app: SentryApp) -> None:
self.sentry_app = sentry_app

def get_attrs(
self, item_list: Sequence[BufferedRequest], user: User | RpcUser, **kwargs: Any
) -> MutableMapping[BufferedRequest, _BufferedRequestAttrs]:
organization_ids = {item.data.organization_id for item in item_list}
organizations = organization_mapping_service.get_many(organization_ids=organization_ids)
organizations_by_id = {organization.id: organization for organization in organizations}

return {
item: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also is there a reason we need this nesting?
i.e {item: { org: Org}} vs {org:Org}

Copy link
Member

@iamrajjoshi iamrajjoshi Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

qood question -
Looking at the code, the nesting structure {item: {"organization": org}} is necessary because of how Django's serializer system works. Here's why:

  1. The get_attrs() method is part of Sentry's Serializer base class pattern, where:
  • It needs to return a mapping of objects to their attributes
  • These attributes are then passed to the serialize() method as the attrs parameter
  • Each object in item_list needs its own set of attributes
    If you were to return just {org: Org}, you would lose the connection between which organization belongs to which item in the item_list, and the serialize() method wouldn't know which organization to use for each object being serialized.
    You can see this in how the data is used in the serialize() method:
def serialize(self, obj: Any, attrs: Mapping[Any, Any], user: Any, **kwargs: Any) -> Mapping[str, Any]:
    organization = attrs.get("organization")  # This comes from the nested structure
    # ... rest of the serialization logic ...

The nesting pattern {item: {attributes}} is a common pattern in Django-style serializers as it maintains the relationship between objects and their associated data throughout the serialization process.

--- from ChatGPT

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

;0 kewl

"organization": (
organizations_by_id.get(item.data.organization_id)
if item.data.organization_id
else None
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we know the shape of the data we could TypedDict this instead of Mapping[Any,Any]

}
for item in item_list
}

def serialize(
self, obj: BufferedRequest, attrs: Mapping[Any, Any], user: Any, **kwargs: Any
) -> SentryAppWebhookRequestSerializerResponse:
organization = attrs.get("organization")
response_code = obj.data.response_code

data: SentryAppWebhookRequestSerializerResponse = {
"webhookUrl": obj.data.webhook_url,
"sentryAppSlug": self.sentry_app.slug,
"eventType": obj.data.event_type,
"date": obj.data.date,
"responseCode": response_code,
}

if organization:
data["organization"] = {"name": organization.name, "slug": organization.slug}
Comment on lines +57 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also we're potentially missing the error response & request bodies from error webhook requests here

Same in the RpcModel

Copy link
Member Author

@ameliahsu ameliahsu Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We decided to remove some properties for now since they're not shown on the dashboard, we can add them back later if needed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uh can you link me to this discussion ? We shouldn't or we should log them instead since we & customers use that data a lot as a debugging resource

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, do we have customer stories where they inspect the response we are sending rather than the dashboard?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the conversation was on Slack but #81267 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, for this customer issue we needed to see our req bodies to validate our webhooks were working. It's also a customer request in this issue. It's currently not a priority to add the UI to the dashboard but without returning at all to the frontend there is no visibility into the buffer for both customers and us.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The general guidance I've been giving @ameliahsu is: exclude it until we actually implement this feature. It should be trivial to add to the response body in the future, but I don't want to be exposing this data right now if we aren't using it. It also gives us an opportunity to analyze the overall performance of this endpoint with smaller payloads to start before adding a bunch of additional data.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clarification q, is 'this feature' == the endpoint ? or the UI ? Either way though, I still think it's very important to expose that data for debugging. The comment 2 up, sums up the reasons and related issues/asks. Without this data being exposed debugging webhooks & buffer issues becomes incredibly hard without setting up an integration on that sentry app's org.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I missed that. Fair point, alright let's go ahead and add it then (sorry @ameliahsu)


return data
Empty file.
99 changes: 99 additions & 0 deletions src/sentry/sentry_apps/api/utils/webhook_requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import TypedDict

from sentry.models.organizationmapping import OrganizationMapping
from sentry.sentry_apps.models.sentry_app import SentryApp
from sentry.sentry_apps.services.app_request import RpcSentryAppRequest, SentryAppRequestFilterArgs
from sentry.sentry_apps.services.app_request.serial import serialize_rpc_sentry_app_request
from sentry.sentry_apps.services.app_request.service import app_request_service
from sentry.types.region import find_all_region_names
from sentry.utils.sentry_apps import SentryAppWebhookRequestsBuffer


@dataclass
class BufferedRequest:
id: int
data: RpcSentryAppRequest

def __hash__(self):
return self.id


class DatetimeOrganizationFilterArgs(TypedDict):
start_time: datetime
end_time: datetime
organization: OrganizationMapping | None


def _filter_by_date(request: RpcSentryAppRequest, start: datetime, end: datetime) -> bool:
date_str = request.date
if not date_str:
return False
timestamp = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S.%f+00:00").replace(
microsecond=0, tzinfo=timezone.utc
)
return start <= timestamp <= end


def _filter_by_organization(
request: RpcSentryAppRequest, organization: OrganizationMapping | None
) -> bool:
if not organization:
return True
return request.organization_id == organization.organization_id


def filter_requests(
unfiltered_requests: list[RpcSentryAppRequest],
filter: DatetimeOrganizationFilterArgs,
) -> list[BufferedRequest]:
requests: list[BufferedRequest] = []
for i, req in enumerate(unfiltered_requests):
start_time = filter.get("start_time")
end_time = filter.get("end_time")
if (
start_time
and end_time
and _filter_by_date(req, start_time, end_time)
and _filter_by_organization(req, organization=filter.get("organization"))
):
requests.append(BufferedRequest(id=i, data=req))
return requests


def get_buffer_requests_from_control(
sentry_app: SentryApp,
filter: SentryAppRequestFilterArgs,
datetime_org_filter: DatetimeOrganizationFilterArgs,
) -> list[BufferedRequest]:
control_buffer = SentryAppWebhookRequestsBuffer(sentry_app)

event = filter.get("event", None) if filter else None
errors_only = filter.get("errors_only", False) if filter else False

unfiltered_requests = [
serialize_rpc_sentry_app_request(req)
for req in control_buffer.get_requests(event=event, errors_only=errors_only)
]
return filter_requests(
unfiltered_requests,
datetime_org_filter,
)


def get_buffer_requests_from_regions(
sentry_app_id: int,
filter: SentryAppRequestFilterArgs,
datetime_org_filter: DatetimeOrganizationFilterArgs,
) -> list[BufferedRequest]:
requests: list[RpcSentryAppRequest] = []
for region_name in find_all_region_names():
buffer_requests = app_request_service.get_buffer_requests_for_region(
sentry_app_id=sentry_app_id,
region_name=region_name,
filter=filter,
)
if buffer_requests:
requests.extend(buffer_requests)
return filter_requests(requests, datetime_org_filter)
4 changes: 2 additions & 2 deletions src/sentry/sentry_apps/services/app_request/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ class RpcSentryAppRequest(RpcModel):
date: str
response_code: int
webhook_url: str
organization_id: int
organization_id: int | None
GabeVillalobos marked this conversation as resolved.
Show resolved Hide resolved
event_type: str


class SentryAppRequestFilterArgs(TypedDict, total=False):
event: str
event: str | list[str]
errors_only: bool
Loading
Loading