Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(feedback): enforce a max message size from all sources #79326

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ hybridcloud: 0016_add_control_cacheversion
nodestore: 0002_nodestore_no_dictfield
remote_subscriptions: 0003_drop_remote_subscription
replays: 0004_index_together
sentry: 0777_add_related_name_to_dashboard_permissions
sentry: 0778_userreport_comments_max_length
social_auth: 0002_default_auto_field
uptime: 0017_unique_on_timeout
workflow_engine: 0009_detector_type
111 changes: 70 additions & 41 deletions src/sentry/feedback/usecases/create_feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sentry import features, options
from sentry.constants import DataCategory
from sentry.eventstore.models import Event, GroupEvent
from sentry.feedback.usecases.spam_detection import is_spam
from sentry.feedback.usecases.spam_detection import is_spam, spam_detection_enabled
from sentry.issues.grouptype import FeedbackGroup
from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence
from sentry.issues.json_schemas import EVENT_PAYLOAD_SCHEMA, LEGACY_EVENT_PAYLOAD_SCHEMA
Expand Down Expand Up @@ -141,7 +141,6 @@ def fix_for_issue_platform(event_data):
# If no user email was provided specify the contact-email as the user-email.
feedback_obj = event_data.get("contexts", {}).get("feedback", {})
contact_email = feedback_obj.get("contact_email")

if not ret_event["user"].get("email", ""):
ret_event["user"]["email"] = contact_email

Expand All @@ -168,6 +167,21 @@ def fix_for_issue_platform(event_data):
return ret_event


def validate_issue_platform_event_schema(event_data):
"""
The issue platform schema validation does not run in dev atm so we have to do the validation
ourselves, or else our tests are not representative of what happens in prod.
"""
try:
jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
try:
jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
metrics.incr("feedback.create_feedback_issue.invalid_schema")
raise


def should_filter_feedback(event, project_id, source: FeedbackCreationSource):
# Right now all unreal error events without a feedback
# actually get a sent a feedback with this message
Expand Down Expand Up @@ -222,26 +236,43 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
if should_filter_feedback(event, project_id, source):
return

feedback_message = event["contexts"]["feedback"]["message"]
max_msg_size = options.get("feedback.message.max-size") # Note options are cached.
project = Project.objects.get_from_cache(id=project_id)

# Spam detection.
is_message_spam = None
if features.has(
"organizations:user-feedback-spam-filter-ingest", project.organization
) and project.get_option("sentry:feedback_ai_spam_detection"):
try:
is_message_spam = is_spam(event["contexts"]["feedback"]["message"])
except Exception:
# until we have LLM error types ironed out, just catch all exceptions
logger.exception("Error checking if message is spam", extra={"project_id": project_id})
metrics.incr(
"feedback.create_feedback_issue.spam_detection",
if spam_detection_enabled(project):
if len(feedback_message) <= max_msg_size:
try:
is_message_spam = is_spam(feedback_message)
except Exception:
# until we have LLM error types ironed out, just catch all exceptions
logger.exception(
"Error checking if message is spam", extra={"project_id": project_id}
)
metrics.incr(
"feedback.create_feedback_issue.spam_detection",
tags={
"is_spam": is_message_spam,
"referrer": source.value,
"client_source": event["contexts"]["feedback"].get("source"),
},
sample_rate=1.0,
)
else:
is_message_spam = True

if len(feedback_message) > max_msg_size:
metrics.distribution(
"feedback.large_message",
len(feedback_message),
tags={
"is_spam": is_message_spam,
"entrypoint": "create_feedback_issue",
"referrer": source.value,
"client_source": event["contexts"]["feedback"].get("source"),
},
sample_rate=1.0,
)
feedback_message = feedback_message[:max_msg_size]

# Note that some of the fields below like title and subtitle
# are not used by the feedback UI, but are required.
Expand All @@ -257,7 +288,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
project_id=project_id,
fingerprint=issue_fingerprint, # random UUID for fingerprint so feedbacks are grouped individually
issue_title="User Feedback",
subtitle=event["contexts"]["feedback"]["message"],
subtitle=feedback_message,
resource_id=None,
evidence_data=evidence_data,
evidence_display=evidence_display,
Expand All @@ -279,6 +310,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
# make sure event data is valid for issue platform
validate_issue_platform_event_schema(event_fixed)

# Analytics
if not project.flags.has_feedbacks:
first_feedback_received.send_robust(project=project, sender=Project)

Expand All @@ -291,9 +323,11 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
):
first_new_feedback_received.send_robust(project=project, sender=Project)

# Send to issue platform for processing.
produce_occurrence_to_kafka(
payload_type=PayloadType.OCCURRENCE, occurrence=occurrence, event_data=event_fixed
)
# Mark as spam. We need this since IP doesn't currently support an initial status of IGNORED.
if is_message_spam:
auto_ignore_spam_feedbacks(project, issue_fingerprint)
metrics.incr(
Expand All @@ -304,6 +338,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
},
sample_rate=1.0,
)

track_outcome(
org_id=project.organization_id,
project_id=project_id,
Expand All @@ -317,19 +352,27 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource
)


def validate_issue_platform_event_schema(event_data):
def auto_ignore_spam_feedbacks(project, issue_fingerprint):
"""
The issue platform schema validation does not run in dev atm so we have to do the validation
ourselves, or else our tests are not representative of what happens in prod.
Marks an issue as spam with a STATUS_CHANGE kafka message. The IGNORED status allows the occurrence to skip alerts
and be picked up by frontend spam queries.
"""
try:
jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
try:
jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA)
except jsonschema.exceptions.ValidationError:
metrics.incr("feedback.create_feedback_issue.invalid_schema")
raise
if features.has("organizations:user-feedback-spam-filter-actions", project.organization):
metrics.incr("feedback.spam-detection-actions.set-ignored")
produce_occurrence_to_kafka(
payload_type=PayloadType.STATUS_CHANGE,
status_change=StatusChangeMessage(
fingerprint=issue_fingerprint,
project_id=project.id,
new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab
new_substatus=GroupSubStatus.FOREVER,
),
)


###########
# Shim code
###########


class UserReportShimDict(TypedDict):
Expand Down Expand Up @@ -390,19 +433,5 @@ def shim_to_feedback(
metrics.incr("feedback.shim_to_feedback.failed", tags={"referrer": source.value})


def auto_ignore_spam_feedbacks(project, issue_fingerprint):
if features.has("organizations:user-feedback-spam-filter-actions", project.organization):
metrics.incr("feedback.spam-detection-actions.set-ignored")
produce_occurrence_to_kafka(
payload_type=PayloadType.STATUS_CHANGE,
status_change=StatusChangeMessage(
fingerprint=issue_fingerprint,
project_id=project.id,
new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab
new_substatus=GroupSubStatus.FOREVER,
),
)


def is_in_feedback_denylist(organization):
return organization.slug in options.get("feedback.organizations.slug-denylist")
8 changes: 8 additions & 0 deletions src/sentry/feedback/usecases/spam_detection.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging

from sentry import features
from sentry.llm.usecases import LLMUseCase, complete_prompt
from sentry.models.project import Project
from sentry.utils import metrics

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -68,3 +70,9 @@ def trim_response(text):
return True, trimmed_text
else:
return False, trimmed_text


def spam_detection_enabled(project: Project) -> bool:
return features.has(
"organizations:user-feedback-spam-filter-ingest", project.organization
) and project.get_option("sentry:feedback_ai_spam_detection")
12 changes: 12 additions & 0 deletions src/sentry/ingest/userreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ def save_userreport(
if should_filter_user_report(report["comments"]):
return

max_comment_length = UserReport._meta.get_field("comments").max_length
if max_comment_length and len(report["comments"]) > max_comment_length:
metrics.distribution(
"feedback.large_message",
len(report["comments"]),
tags={
"entrypoint": "save_userreport",
"referrer": source.value,
},
)
report["comments"] = report["comments"][:max_comment_length]

if start_time is None:
start_time = timezone.now()

Expand Down
33 changes: 33 additions & 0 deletions src/sentry/migrations/0778_userreport_comments_max_length.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Generated by Django 5.1.1 on 2024-10-17 23:33

from django.db import migrations, models

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = False

dependencies = [
("sentry", "0777_add_related_name_to_dashboard_permissions"),
]

operations = [
migrations.AlterField(
model_name="userreport",
name="comments",
field=models.TextField(max_length=4096),
),
]
4 changes: 3 additions & 1 deletion src/sentry/models/userreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ class UserReport(Model):
environment_id = BoundedBigIntegerField(null=True, db_index=True)
name = models.CharField(max_length=128)
email = models.EmailField(max_length=75)
comments = models.TextField()
comments = models.TextField(
max_length=4096
) # Keep max_length <= "feedback.message.max-size" sentry option.
date_added = models.DateTimeField(default=timezone.now, db_index=True)

class Meta:
Expand Down
6 changes: 6 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,12 @@
default=[],
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)
register(
"feedback.message.max-size",
type=Int,
default=4096,
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

# Dev Toolbar Options
register(
Expand Down
8 changes: 5 additions & 3 deletions src/sentry/web/frontend/error_page_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,16 @@

class UserReportForm(forms.ModelForm):
name = forms.CharField(
max_length=128, widget=forms.TextInput(attrs={"placeholder": _("Jane Bloggs")})
max_length=UserReport._meta.get_field("name").max_length,
widget=forms.TextInput(attrs={"placeholder": _("Jane Bloggs")}),
)
email = forms.EmailField(
max_length=75,
max_length=UserReport._meta.get_field("email").max_length,
widget=forms.TextInput(attrs={"placeholder": _("jane@example.com"), "type": "email"}),
)
comments = forms.CharField(
widget=forms.Textarea(attrs={"placeholder": _("I clicked on 'X' and then hit 'Confirm'")})
max_length=UserReport._meta.get_field("comments").max_length,
widget=forms.Textarea(attrs={"placeholder": _("I clicked on 'X' and then hit 'Confirm'")}),
)

class Meta:
Expand Down
53 changes: 50 additions & 3 deletions tests/sentry/feedback/usecases/test_create_feedback.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import time
from datetime import datetime
from datetime import UTC, datetime
from typing import Any
from unittest.mock import Mock

Expand Down Expand Up @@ -773,8 +773,55 @@ def test_create_feedback_spam_detection_set_status_ignored(
assert group.substatus == GroupSubStatus.FOREVER


# Unit tests for shim_to_feedback error cases. The typical behavior of this function is tested in
# test_project_user_reports, test_post_process, and test_update_user_reports.
@django_db_all
def test_create_feedback_large_message_truncated(
default_project, mock_produce_occurrence_to_kafka, set_sentry_option
):
"""Large messages are truncated before producing to kafka."""
with set_sentry_option("feedback.message.max-size", 4096):
event = mock_feedback_event(default_project.id, datetime.now(UTC))
event["contexts"]["feedback"]["message"] = "a" * 7007
create_feedback_issue(
event, default_project.id, FeedbackCreationSource.NEW_FEEDBACK_ENVELOPE
)

kwargs = mock_produce_occurrence_to_kafka.call_args[1]
assert len(kwargs["occurrence"].subtitle) == 4096


@django_db_all
def test_create_feedback_large_message_skips_spam_detection(
default_project, set_sentry_option, monkeypatch
):
"""If spam is enabled, large messages are marked as spam without making an LLM request."""
with Feature(
{
"organizations:user-feedback-spam-filter-actions": True,
"organizations:user-feedback-spam-filter-ingest": True,
"organizations:feedback-ingest": True,
}
), set_sentry_option("feedback.message.max-size", 4096):

event = mock_feedback_event(default_project.id, datetime.now(UTC))
event["contexts"]["feedback"]["message"] = "a" * 7007

mock_complete_prompt = Mock()
monkeypatch.setattr("sentry.llm.usecases.complete_prompt", mock_complete_prompt)

create_feedback_issue(
event, default_project.id, FeedbackCreationSource.NEW_FEEDBACK_ENVELOPE
)
assert mock_complete_prompt.call_count == 0

group = Group.objects.get()
assert group.status == GroupStatus.IGNORED
assert group.substatus == GroupSubStatus.FOREVER


"""
Unit tests for shim_to_feedback error cases. The typical behavior of this function is tested in
test_project_user_reports, test_post_process, and test_update_user_reports.
"""


@django_db_all
Expand Down
Loading
Loading