From 64292729a3e0856ce52d6db229fe7a8a621cb1b3 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:49:30 -0700 Subject: [PATCH 01/12] Truncate large msgs in create_feedback and skip spam detection. +some refactoring --- .../feedback/usecases/create_feedback.py | 105 +++++++++--------- .../feedback/usecases/spam_detection.py | 26 +++++ src/sentry/options/defaults.py | 6 + 3 files changed, 84 insertions(+), 53 deletions(-) diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py index f19ba419d7c8c..3ba4efcd603c5 100644 --- a/src/sentry/feedback/usecases/create_feedback.py +++ b/src/sentry/feedback/usecases/create_feedback.py @@ -8,19 +8,20 @@ import jsonschema -from sentry import features, options +from sentry import options from sentry.constants import DataCategory from sentry.eventstore.models import Event, GroupEvent -from sentry.feedback.usecases.spam_detection import is_spam +from sentry.feedback.usecases.spam_detection import ( + auto_ignore_spam_feedbacks, + is_spam, + spam_detection_enabled, +) from sentry.issues.grouptype import FeedbackGroup from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence from sentry.issues.json_schemas import EVENT_PAYLOAD_SCHEMA, LEGACY_EVENT_PAYLOAD_SCHEMA from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka -from sentry.issues.status_change_message import StatusChangeMessage -from sentry.models.group import GroupStatus from sentry.models.project import Project from sentry.signals import first_feedback_received, first_new_feedback_received -from sentry.types.group import GroupSubStatus from sentry.utils import metrics from sentry.utils.outcomes import Outcome, track_outcome from sentry.utils.safe import get_path @@ -141,7 +142,6 @@ def fix_for_issue_platform(event_data): # If no user email was provided specify the contact-email as the user-email. feedback_obj = event_data.get("contexts", {}).get("feedback", {}) contact_email = feedback_obj.get("contact_email") - if not ret_event["user"].get("email", ""): ret_event["user"]["email"] = contact_email @@ -168,6 +168,21 @@ def fix_for_issue_platform(event_data): return ret_event +def validate_issue_platform_event_schema(event_data): + """ + The issue platform schema validation does not run in dev atm so we have to do the validation + ourselves, or else our tests are not representative of what happens in prod. + """ + try: + jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA) + except jsonschema.exceptions.ValidationError: + try: + jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA) + except jsonschema.exceptions.ValidationError: + metrics.incr("feedback.create_feedback_issue.invalid_schema") + raise + + def should_filter_feedback(event, project_id, source: FeedbackCreationSource): # Right now all unreal error events without a feedback # actually get a sent a feedback with this message @@ -222,26 +237,35 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource if should_filter_feedback(event, project_id, source): return + feedback_message = event["contexts"]["feedback"]["message"] + max_msg_size = options.get("feedback.message.max-size") # Note options are cached. project = Project.objects.get_from_cache(id=project_id) + # Spam detection. is_message_spam = None - if features.has( - "organizations:user-feedback-spam-filter-ingest", project.organization - ) and project.get_option("sentry:feedback_ai_spam_detection"): - try: - is_message_spam = is_spam(event["contexts"]["feedback"]["message"]) - except Exception: - # until we have LLM error types ironed out, just catch all exceptions - logger.exception("Error checking if message is spam", extra={"project_id": project_id}) - metrics.incr( - "feedback.create_feedback_issue.spam_detection", - tags={ - "is_spam": is_message_spam, - "referrer": source.value, - "client_source": event["contexts"]["feedback"].get("source"), - }, - sample_rate=1.0, - ) + if spam_detection_enabled(project): + if len(feedback_message) <= max_msg_size: + try: + is_message_spam = is_spam(feedback_message) + except Exception: + # until we have LLM error types ironed out, just catch all exceptions + logger.exception( + "Error checking if message is spam", extra={"project_id": project_id} + ) + metrics.incr( + "feedback.create_feedback_issue.spam_detection", + tags={ + "is_spam": is_message_spam, + "referrer": source.value, + "client_source": event["contexts"]["feedback"].get("source"), + }, + sample_rate=1.0, + ) + else: + is_message_spam = True + + if len(feedback_message) > max_msg_size: + feedback_message = feedback_message[:max_msg_size] # Note that some of the fields below like title and subtitle # are not used by the feedback UI, but are required. @@ -257,7 +281,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource project_id=project_id, fingerprint=issue_fingerprint, # random UUID for fingerprint so feedbacks are grouped individually issue_title="User Feedback", - subtitle=event["contexts"]["feedback"]["message"], + subtitle=feedback_message, resource_id=None, evidence_data=evidence_data, evidence_display=evidence_display, @@ -279,6 +303,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource # make sure event data is valid for issue platform validate_issue_platform_event_schema(event_fixed) + # Analytics if not project.flags.has_feedbacks: first_feedback_received.send_robust(project=project, sender=Project) @@ -291,9 +316,11 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource ): first_new_feedback_received.send_robust(project=project, sender=Project) + # Send to issue platform for processing. produce_occurrence_to_kafka( payload_type=PayloadType.OCCURRENCE, occurrence=occurrence, event_data=event_fixed ) + # Mark as spam with a STATUS_CHANGE kafka message. if is_message_spam: auto_ignore_spam_feedbacks(project, issue_fingerprint) metrics.incr( @@ -304,6 +331,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource }, sample_rate=1.0, ) + track_outcome( org_id=project.organization_id, project_id=project_id, @@ -317,21 +345,6 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource ) -def validate_issue_platform_event_schema(event_data): - """ - The issue platform schema validation does not run in dev atm so we have to do the validation - ourselves, or else our tests are not representative of what happens in prod. - """ - try: - jsonschema.validate(event_data, EVENT_PAYLOAD_SCHEMA) - except jsonschema.exceptions.ValidationError: - try: - jsonschema.validate(event_data, LEGACY_EVENT_PAYLOAD_SCHEMA) - except jsonschema.exceptions.ValidationError: - metrics.incr("feedback.create_feedback_issue.invalid_schema") - raise - - class UserReportShimDict(TypedDict): name: str email: str @@ -390,19 +403,5 @@ def shim_to_feedback( metrics.incr("feedback.shim_to_feedback.failed", tags={"referrer": source.value}) -def auto_ignore_spam_feedbacks(project, issue_fingerprint): - if features.has("organizations:user-feedback-spam-filter-actions", project.organization): - metrics.incr("feedback.spam-detection-actions.set-ignored") - produce_occurrence_to_kafka( - payload_type=PayloadType.STATUS_CHANGE, - status_change=StatusChangeMessage( - fingerprint=issue_fingerprint, - project_id=project.id, - new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab - new_substatus=GroupSubStatus.FOREVER, - ), - ) - - def is_in_feedback_denylist(organization): return organization.slug in options.get("feedback.organizations.slug-denylist") diff --git a/src/sentry/feedback/usecases/spam_detection.py b/src/sentry/feedback/usecases/spam_detection.py index eb00a9dab835a..5b1850de7d2b9 100644 --- a/src/sentry/feedback/usecases/spam_detection.py +++ b/src/sentry/feedback/usecases/spam_detection.py @@ -1,6 +1,12 @@ import logging +from sentry import features +from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka +from sentry.issues.status_change_message import StatusChangeMessage from sentry.llm.usecases import LLMUseCase, complete_prompt +from sentry.models.group import GroupStatus +from sentry.models.project import Project +from sentry.types.group import GroupSubStatus from sentry.utils import metrics logger = logging.getLogger(__name__) @@ -68,3 +74,23 @@ def trim_response(text): return True, trimmed_text else: return False, trimmed_text + + +def spam_detection_enabled(project: Project) -> bool: + return features.has( + "organizations:user-feedback-spam-filter-ingest", project.organization + ) and project.get_option("sentry:feedback_ai_spam_detection") + + +def auto_ignore_spam_feedbacks(project, issue_fingerprint): + if features.has("organizations:user-feedback-spam-filter-actions", project.organization): + metrics.incr("feedback.spam-detection-actions.set-ignored") + produce_occurrence_to_kafka( + payload_type=PayloadType.STATUS_CHANGE, + status_change=StatusChangeMessage( + fingerprint=issue_fingerprint, + project_id=project.id, + new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab + new_substatus=GroupSubStatus.FOREVER, + ), + ) diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 18550131c90cd..4417d0c0dff13 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -474,6 +474,12 @@ default=[], flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE, ) +register( + "feedback.message.max-size", + type=Int, + default=4096, + flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE, +) # Dev Toolbar Options register( From 852f56a3ae0b7bc5b8948ab0e8df2f51d053731e Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:35:47 -0700 Subject: [PATCH 02/12] Add create_feedback tests --- .../feedback/usecases/test_create_feedback.py | 53 +++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/tests/sentry/feedback/usecases/test_create_feedback.py b/tests/sentry/feedback/usecases/test_create_feedback.py index 7dd67ab96eb86..8988e2fd50b92 100644 --- a/tests/sentry/feedback/usecases/test_create_feedback.py +++ b/tests/sentry/feedback/usecases/test_create_feedback.py @@ -1,7 +1,7 @@ from __future__ import annotations import time -from datetime import datetime +from datetime import UTC, datetime from typing import Any from unittest.mock import Mock @@ -773,8 +773,55 @@ def test_create_feedback_spam_detection_set_status_ignored( assert group.substatus == GroupSubStatus.FOREVER -# Unit tests for shim_to_feedback error cases. The typical behavior of this function is tested in -# test_project_user_reports, test_post_process, and test_update_user_reports. +@django_db_all +def test_create_feedback_large_message_truncated( + default_project, mock_produce_occurrence_to_kafka, set_sentry_option +): + """Large messages are truncated before producing to kafka.""" + with set_sentry_option("feedback.message.max-size", 4096): + event = mock_feedback_event(default_project.id, datetime.now(UTC)) + event["contexts"]["feedback"]["message"] = "a" * 7007 + create_feedback_issue( + event, default_project.id, FeedbackCreationSource.NEW_FEEDBACK_ENVELOPE + ) + + kwargs = mock_produce_occurrence_to_kafka.call_args[1] + assert len(kwargs["occurrence"].subtitle) == 4096 + + +@django_db_all +def test_create_feedback_large_message_skips_spam_detection( + default_project, set_sentry_option, monkeypatch +): + """If spam is enabled, large messages are marked as spam without making an LLM request.""" + with Feature( + { + "organizations:user-feedback-spam-filter-actions": True, + "organizations:user-feedback-spam-filter-ingest": True, + "organizations:feedback-ingest": True, + } + ), set_sentry_option("feedback.message.max-size", 4096): + + event = mock_feedback_event(default_project.id, datetime.now(UTC)) + event["contexts"]["feedback"]["message"] = "a" * 7007 + + mock_complete_prompt = Mock() + monkeypatch.setattr("sentry.llm.usecases.complete_prompt", mock_complete_prompt) + + create_feedback_issue( + event, default_project.id, FeedbackCreationSource.NEW_FEEDBACK_ENVELOPE + ) + assert mock_complete_prompt.call_count == 0 + + group = Group.objects.get() + assert group.status == GroupStatus.IGNORED + assert group.substatus == GroupSubStatus.FOREVER + + +""" +Unit tests for shim_to_feedback error cases. The typical behavior of this function is tested in +test_project_user_reports, test_post_process, and test_update_user_reports. +""" @django_db_all From dfae0ea1bff1742121fa33bd617a2bd03d0fdf7a Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:39:36 -0700 Subject: [PATCH 03/12] Add metric (with fancy tag) --- src/sentry/feedback/usecases/create_feedback.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py index 3ba4efcd603c5..7157a1a2d4428 100644 --- a/src/sentry/feedback/usecases/create_feedback.py +++ b/src/sentry/feedback/usecases/create_feedback.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import math from datetime import UTC, datetime from enum import Enum from typing import Any, TypedDict @@ -265,6 +266,10 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource is_message_spam = True if len(feedback_message) > max_msg_size: + metrics.incr( + "feedback.create_feedback_issue.large_message", + tags={"nearest_pow2": 2 ** math.ceil(math.log2(len(feedback_message)))}, + ) feedback_message = feedback_message[:max_msg_size] # Note that some of the fields below like title and subtitle From 16ca14daff59b9e977e74a1abd92795e618edcec Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:40:29 -0700 Subject: [PATCH 04/12] Rename size tag --- src/sentry/feedback/usecases/create_feedback.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py index 7157a1a2d4428..a65f0feb28a79 100644 --- a/src/sentry/feedback/usecases/create_feedback.py +++ b/src/sentry/feedback/usecases/create_feedback.py @@ -268,7 +268,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource if len(feedback_message) > max_msg_size: metrics.incr( "feedback.create_feedback_issue.large_message", - tags={"nearest_pow2": 2 ** math.ceil(math.log2(len(feedback_message)))}, + tags={"pow2_size_bucket": 2 ** math.ceil(math.log2(len(feedback_message)))}, ) feedback_message = feedback_message[:max_msg_size] From 54d97c80727e44ffc5a20b8598a198343a4db8d6 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:53:56 -0700 Subject: [PATCH 05/12] Truncate in save_userreport --- src/sentry/feedback/usecases/create_feedback.py | 8 ++++++-- src/sentry/ingest/userreport.py | 13 +++++++++++++ src/sentry/models/userreport.py | 4 +++- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py index a65f0feb28a79..4c55c5945124a 100644 --- a/src/sentry/feedback/usecases/create_feedback.py +++ b/src/sentry/feedback/usecases/create_feedback.py @@ -267,8 +267,12 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource if len(feedback_message) > max_msg_size: metrics.incr( - "feedback.create_feedback_issue.large_message", - tags={"pow2_size_bucket": 2 ** math.ceil(math.log2(len(feedback_message)))}, + "feedback.large_message", + tags={ + "pow2_size_bucket": 2 ** math.ceil(math.log2(len(feedback_message))), + "entrypoint": "create_feedback_issue", + "referrer": source.value, + }, ) feedback_message = feedback_message[:max_msg_size] diff --git a/src/sentry/ingest/userreport.py b/src/sentry/ingest/userreport.py index 904f66418b89d..4d0ded302f13f 100644 --- a/src/sentry/ingest/userreport.py +++ b/src/sentry/ingest/userreport.py @@ -1,6 +1,7 @@ from __future__ import annotations import logging +import math from datetime import timedelta from django.db import IntegrityError, router @@ -39,6 +40,18 @@ def save_userreport( if should_filter_user_report(report["comments"]): return + max_comment_length = UserReport._meta.get_field("comments").max_length + if len(report["comments"]) > max_comment_length: + metrics.incr( + "feedback.large_message", + tags={ + "pow2_size_bucket": 2 ** math.ceil(math.log2(len(report["comments"]))), + "entrypoint": "save_userreport", + "referrer": source.value, + }, + ) + report["comments"] = report["comments"][:max_comment_length] + if start_time is None: start_time = timezone.now() diff --git a/src/sentry/models/userreport.py b/src/sentry/models/userreport.py index 14d1b4c844f1d..8d7f1037f93e7 100644 --- a/src/sentry/models/userreport.py +++ b/src/sentry/models/userreport.py @@ -15,7 +15,9 @@ class UserReport(Model): environment_id = BoundedBigIntegerField(null=True, db_index=True) name = models.CharField(max_length=128) email = models.EmailField(max_length=75) - comments = models.TextField() + comments = models.TextField( + max_length=4096 + ) # Keep max_length <= "feedback.message.max-size" sentry option. date_added = models.DateTimeField(default=timezone.now, db_index=True) class Meta: From a325ab71f511ba35df0bbd914d3a5e2776092914 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:54:38 -0700 Subject: [PATCH 06/12] Set length limit in crash report Form class --- src/sentry/web/frontend/error_page_embed.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/sentry/web/frontend/error_page_embed.py b/src/sentry/web/frontend/error_page_embed.py index 68c608e88303b..90905f7adbdcc 100644 --- a/src/sentry/web/frontend/error_page_embed.py +++ b/src/sentry/web/frontend/error_page_embed.py @@ -58,14 +58,16 @@ class UserReportForm(forms.ModelForm): name = forms.CharField( - max_length=128, widget=forms.TextInput(attrs={"placeholder": _("Jane Bloggs")}) + max_length=UserReport._meta.get_field("name").max_length, + widget=forms.TextInput(attrs={"placeholder": _("Jane Bloggs")}), ) email = forms.EmailField( - max_length=75, + max_length=UserReport._meta.get_field("email").max_length, widget=forms.TextInput(attrs={"placeholder": _("jane@example.com"), "type": "email"}), ) comments = forms.CharField( - widget=forms.Textarea(attrs={"placeholder": _("I clicked on 'X' and then hit 'Confirm'")}) + max_length=UserReport._meta.get_field("comments").max_length, + widget=forms.Textarea(attrs={"placeholder": _("I clicked on 'X' and then hit 'Confirm'")}), ) class Meta: From ab32abab3c7a87f8595a11d9248b1fa2d0e5d736 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:08:40 -0700 Subject: [PATCH 07/12] Add save_userreport and crash report form coverage --- tests/sentry/ingest/test_userreport.py | 36 +++++++++++++++++-- .../web/frontend/test_error_page_embed.py | 10 ++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/tests/sentry/ingest/test_userreport.py b/tests/sentry/ingest/test_userreport.py index faa483743cd8c..2ec8fe24fe342 100644 --- a/tests/sentry/ingest/test_userreport.py +++ b/tests/sentry/ingest/test_userreport.py @@ -1,4 +1,7 @@ -from sentry.feedback.usecases.create_feedback import UNREAL_FEEDBACK_UNATTENDED_MESSAGE +from sentry.feedback.usecases.create_feedback import ( + UNREAL_FEEDBACK_UNATTENDED_MESSAGE, + FeedbackCreationSource, +) from sentry.ingest.userreport import save_userreport, should_filter_user_report from sentry.models.userreport import UserReport from sentry.testutils.pytest.fixtures import django_db_all @@ -44,7 +47,7 @@ def test_save_user_report_returns_instance(set_sentry_option, default_project, m } # Call the function - result = save_userreport(default_project, report, "api") + result = save_userreport(default_project, report, FeedbackCreationSource.USER_REPORT_ENVELOPE) # Assert the result is an instance of UserReport assert isinstance(result, UserReport) @@ -61,6 +64,33 @@ def test_save_user_report_denylist(set_sentry_option, default_project, monkeypat "project_id": default_project.id, } - result = save_userreport(default_project, report, "api") + result = save_userreport(default_project, report, FeedbackCreationSource.USER_REPORT_ENVELOPE) assert result is None + + +@django_db_all +def test_save_user_report_large_message_truncated(default_project, monkeypatch): + # Mocking dependencies and setting up test data + monkeypatch.setattr("sentry.ingest.userreport.is_in_feedback_denylist", lambda org: False) + monkeypatch.setattr("sentry.ingest.userreport.should_filter_user_report", lambda message: False) + monkeypatch.setattr( + "sentry.eventstore.backend.get_event_by_id", lambda project_id, event_id: None + ) + + max_length = UserReport._meta.get_field("comments").max_length + report = { + "event_id": "123456", + "name": "Test User", + "email": "test@example.com", + "comments": "a" * (max_length + 3001), + "project_id": default_project.id, + } + + result = save_userreport(default_project, report, FeedbackCreationSource.USER_REPORT_ENVELOPE) + saved_result = UserReport.objects.get() + assert isinstance(result, UserReport) + assert result.id == saved_result.id + + assert len(result.comments) == max_length + assert len(saved_result.comments) == max_length diff --git a/tests/sentry/web/frontend/test_error_page_embed.py b/tests/sentry/web/frontend/test_error_page_embed.py index 4205a6b2a9815..0ce556af3ae42 100644 --- a/tests/sentry/web/frontend/test_error_page_embed.py +++ b/tests/sentry/web/frontend/test_error_page_embed.py @@ -191,6 +191,16 @@ def test_submission_invalid_event_id(self): ) assert resp.status_code == 400, resp.content + def test_submission_message_too_large(self): + resp = self.client.post( + self.path_with_qs, + {"name": "Jane Bloggs", "email": "jane@example.com", "comments": "a" * 9001}, + HTTP_REFERER="http://example.com", + HTTP_ACCEPT="application/json", + ) + assert resp.status_code == 400, resp.content + assert not UserReport.objects.exists() + @override_settings(ROOT_URLCONF="sentry.conf.urls") class ErrorPageEmbedEnvironmentTest(TestCase): From 6c54428df14107f0c8ff89d82ca73b3128bfb519 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:20:16 -0700 Subject: [PATCH 08/12] Fix typing --- src/sentry/ingest/userreport.py | 2 +- tests/sentry/ingest/test_userreport.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/sentry/ingest/userreport.py b/src/sentry/ingest/userreport.py index 4d0ded302f13f..1f5714e00295f 100644 --- a/src/sentry/ingest/userreport.py +++ b/src/sentry/ingest/userreport.py @@ -41,7 +41,7 @@ def save_userreport( return max_comment_length = UserReport._meta.get_field("comments").max_length - if len(report["comments"]) > max_comment_length: + if max_comment_length and len(report["comments"]) > max_comment_length: metrics.incr( "feedback.large_message", tags={ diff --git a/tests/sentry/ingest/test_userreport.py b/tests/sentry/ingest/test_userreport.py index 2ec8fe24fe342..7554f44be1c2a 100644 --- a/tests/sentry/ingest/test_userreport.py +++ b/tests/sentry/ingest/test_userreport.py @@ -79,6 +79,9 @@ def test_save_user_report_large_message_truncated(default_project, monkeypatch): ) max_length = UserReport._meta.get_field("comments").max_length + if not max_length: + assert False, "Missing max_length for UserReport comments field!" + report = { "event_id": "123456", "name": "Test User", From b517c6725859431d7c63470be4bac1c24e9590b5 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Thu, 17 Oct 2024 16:34:06 -0700 Subject: [PATCH 09/12] Make migration --- migrations_lockfile.txt | 2 +- .../0778_userreport_comments_max_length.py | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 src/sentry/migrations/0778_userreport_comments_max_length.py diff --git a/migrations_lockfile.txt b/migrations_lockfile.txt index 27f992dc08ccf..fe34f29be0e02 100644 --- a/migrations_lockfile.txt +++ b/migrations_lockfile.txt @@ -10,7 +10,7 @@ hybridcloud: 0016_add_control_cacheversion nodestore: 0002_nodestore_no_dictfield remote_subscriptions: 0003_drop_remote_subscription replays: 0004_index_together -sentry: 0777_add_related_name_to_dashboard_permissions +sentry: 0778_userreport_comments_max_length social_auth: 0002_default_auto_field uptime: 0017_unique_on_timeout workflow_engine: 0009_detector_type diff --git a/src/sentry/migrations/0778_userreport_comments_max_length.py b/src/sentry/migrations/0778_userreport_comments_max_length.py new file mode 100644 index 0000000000000..8834c8d1e2c0c --- /dev/null +++ b/src/sentry/migrations/0778_userreport_comments_max_length.py @@ -0,0 +1,33 @@ +# Generated by Django 5.1.1 on 2024-10-17 23:33 + +from django.db import migrations, models + +from sentry.new_migrations.migrations import CheckedMigration + + +class Migration(CheckedMigration): + # This flag is used to mark that a migration shouldn't be automatically run in production. + # This should only be used for operations where it's safe to run the migration after your + # code has deployed. So this should not be used for most operations that alter the schema + # of a table. + # Here are some things that make sense to mark as post deployment: + # - Large data migrations. Typically we want these to be run manually so that they can be + # monitored and not block the deploy for a long period of time while they run. + # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to + # run this outside deployments so that we don't block them. Note that while adding an index + # is a schema change, it's completely safe to run the operation after the code has deployed. + # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment + + is_post_deployment = False + + dependencies = [ + ("sentry", "0777_add_related_name_to_dashboard_permissions"), + ] + + operations = [ + migrations.AlterField( + model_name="userreport", + name="comments", + field=models.TextField(max_length=4096), + ), + ] From 9af5645ce75930f85af93eac00bb6b8f75310672 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:22:38 -0700 Subject: [PATCH 10/12] Move auto_ignore_feedback back to create_feedback to keep kafka logic in one file + rename/comment --- .../feedback/usecases/create_feedback.py | 38 +++++++++++++++---- .../feedback/usecases/spam_detection.py | 18 --------- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py index 4c55c5945124a..877880b697139 100644 --- a/src/sentry/feedback/usecases/create_feedback.py +++ b/src/sentry/feedback/usecases/create_feedback.py @@ -9,20 +9,19 @@ import jsonschema -from sentry import options +from sentry import features, options from sentry.constants import DataCategory from sentry.eventstore.models import Event, GroupEvent -from sentry.feedback.usecases.spam_detection import ( - auto_ignore_spam_feedbacks, - is_spam, - spam_detection_enabled, -) +from sentry.feedback.usecases.spam_detection import is_spam, spam_detection_enabled from sentry.issues.grouptype import FeedbackGroup from sentry.issues.issue_occurrence import IssueEvidence, IssueOccurrence from sentry.issues.json_schemas import EVENT_PAYLOAD_SCHEMA, LEGACY_EVENT_PAYLOAD_SCHEMA from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka +from sentry.issues.status_change_message import StatusChangeMessage +from sentry.models.group import GroupStatus from sentry.models.project import Project from sentry.signals import first_feedback_received, first_new_feedback_received +from sentry.types.group import GroupSubStatus from sentry.utils import metrics from sentry.utils.outcomes import Outcome, track_outcome from sentry.utils.safe import get_path @@ -329,9 +328,9 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource produce_occurrence_to_kafka( payload_type=PayloadType.OCCURRENCE, occurrence=occurrence, event_data=event_fixed ) - # Mark as spam with a STATUS_CHANGE kafka message. + # Mark as spam. We need this since IP doesn't currently support an initial status of IGNORED. if is_message_spam: - auto_ignore_spam_feedbacks(project, issue_fingerprint) + set_feedback_ignored(project, issue_fingerprint) metrics.incr( "feedback.create_feedback_issue.produced_occurrence", tags={ @@ -354,6 +353,29 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource ) +def set_feedback_ignored(project, issue_fingerprint): + """ + Marks an issue as spam with a STATUS_CHANGE kafka message. The IGNORED status allows the occurrence to skip alerts + and be picked up by frontend spam queries. + """ + if features.has("organizations:user-feedback-spam-filter-actions", project.organization): + metrics.incr("feedback.spam-detection-actions.set-ignored") + produce_occurrence_to_kafka( + payload_type=PayloadType.STATUS_CHANGE, + status_change=StatusChangeMessage( + fingerprint=issue_fingerprint, + project_id=project.id, + new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab + new_substatus=GroupSubStatus.FOREVER, + ), + ) + + +########### +# Shim code +########### + + class UserReportShimDict(TypedDict): name: str email: str diff --git a/src/sentry/feedback/usecases/spam_detection.py b/src/sentry/feedback/usecases/spam_detection.py index 5b1850de7d2b9..e567d130fdece 100644 --- a/src/sentry/feedback/usecases/spam_detection.py +++ b/src/sentry/feedback/usecases/spam_detection.py @@ -1,12 +1,8 @@ import logging from sentry import features -from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka -from sentry.issues.status_change_message import StatusChangeMessage from sentry.llm.usecases import LLMUseCase, complete_prompt -from sentry.models.group import GroupStatus from sentry.models.project import Project -from sentry.types.group import GroupSubStatus from sentry.utils import metrics logger = logging.getLogger(__name__) @@ -80,17 +76,3 @@ def spam_detection_enabled(project: Project) -> bool: return features.has( "organizations:user-feedback-spam-filter-ingest", project.organization ) and project.get_option("sentry:feedback_ai_spam_detection") - - -def auto_ignore_spam_feedbacks(project, issue_fingerprint): - if features.has("organizations:user-feedback-spam-filter-actions", project.organization): - metrics.incr("feedback.spam-detection-actions.set-ignored") - produce_occurrence_to_kafka( - payload_type=PayloadType.STATUS_CHANGE, - status_change=StatusChangeMessage( - fingerprint=issue_fingerprint, - project_id=project.id, - new_status=GroupStatus.IGNORED, # we use ignored in the UI for the spam tab - new_substatus=GroupSubStatus.FOREVER, - ), - ) From 561c38eab3f86e6cd099397530b255a22e44a979 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:58:55 -0700 Subject: [PATCH 11/12] Revert name for auto_ignore_spam_feedbacks --- src/sentry/feedback/usecases/create_feedback.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py index 877880b697139..2fb668861dd1a 100644 --- a/src/sentry/feedback/usecases/create_feedback.py +++ b/src/sentry/feedback/usecases/create_feedback.py @@ -330,7 +330,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource ) # Mark as spam. We need this since IP doesn't currently support an initial status of IGNORED. if is_message_spam: - set_feedback_ignored(project, issue_fingerprint) + auto_ignore_spam_feedbacks(project, issue_fingerprint) metrics.incr( "feedback.create_feedback_issue.produced_occurrence", tags={ @@ -353,7 +353,7 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource ) -def set_feedback_ignored(project, issue_fingerprint): +def auto_ignore_spam_feedbacks(project, issue_fingerprint): """ Marks an issue as spam with a STATUS_CHANGE kafka message. The IGNORED status allows the occurrence to skip alerts and be picked up by frontend spam queries. From bf7b53a55237b28afc5df0aeb93506f6145522f3 Mon Sep 17 00:00:00 2001 From: Andrew Liu <159852527+aliu39@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:03:34 -0700 Subject: [PATCH 12/12] Use metrics.distribution --- src/sentry/feedback/usecases/create_feedback.py | 5 ++--- src/sentry/ingest/userreport.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/sentry/feedback/usecases/create_feedback.py b/src/sentry/feedback/usecases/create_feedback.py index 2fb668861dd1a..57fed65cd7e0e 100644 --- a/src/sentry/feedback/usecases/create_feedback.py +++ b/src/sentry/feedback/usecases/create_feedback.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import math from datetime import UTC, datetime from enum import Enum from typing import Any, TypedDict @@ -265,10 +264,10 @@ def create_feedback_issue(event, project_id: int, source: FeedbackCreationSource is_message_spam = True if len(feedback_message) > max_msg_size: - metrics.incr( + metrics.distribution( "feedback.large_message", + len(feedback_message), tags={ - "pow2_size_bucket": 2 ** math.ceil(math.log2(len(feedback_message))), "entrypoint": "create_feedback_issue", "referrer": source.value, }, diff --git a/src/sentry/ingest/userreport.py b/src/sentry/ingest/userreport.py index 1f5714e00295f..bac931c70b405 100644 --- a/src/sentry/ingest/userreport.py +++ b/src/sentry/ingest/userreport.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -import math from datetime import timedelta from django.db import IntegrityError, router @@ -42,10 +41,10 @@ def save_userreport( max_comment_length = UserReport._meta.get_field("comments").max_length if max_comment_length and len(report["comments"]) > max_comment_length: - metrics.incr( + metrics.distribution( "feedback.large_message", + len(report["comments"]), tags={ - "pow2_size_bucket": 2 ** math.ceil(math.log2(len(report["comments"]))), "entrypoint": "save_userreport", "referrer": source.value, },