Skip to content

Commit

Permalink
feat(grouping): Store metadata for new grouphashes (#76245)
Browse files Browse the repository at this point in the history
As part of the rollout of the new grouphash metadata table, this creates a `GroupHashMetadata` row for every new `GroupHash` we create. It's gated by a killswitch and a feature flag, and doesn't do much yet - the only data in the table so far is a creation timestamp - but this should give us a sense of whether or not there's going to be any hit to ingest DB performance.
  • Loading branch information
lobsterkatie authored Sep 3, 2024
1 parent edd7bc3 commit f78e1bc
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 7 deletions.
10 changes: 3 additions & 7 deletions src/sentry/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
find_existing_grouphash,
find_existing_grouphash_new,
get_hash_values,
get_or_create_grouphashes,
maybe_run_background_grouping,
maybe_run_secondary_grouping,
run_primary_grouping,
Expand Down Expand Up @@ -1414,9 +1415,7 @@ def _save_aggregate(
and not primary_hashes.hierarchical_hashes
)

flat_grouphashes = [
GroupHash.objects.get_or_create(project=project, hash=hash)[0] for hash in hashes.hashes
]
flat_grouphashes = get_or_create_grouphashes(project, hashes)

# The root_hierarchical_hash is the least specific hash within the tree, so
# typically hierarchical_hashes[0], unless a hash `n` has been split in
Expand Down Expand Up @@ -1782,10 +1781,7 @@ def get_hashes_and_grouphashes(
grouping_config, hashes = hash_calculation_function(project, job, metric_tags)

if extract_hashes(hashes):
grouphashes = [
GroupHash.objects.get_or_create(project=project, hash=hash)[0]
for hash in extract_hashes(hashes)
]
grouphashes = get_or_create_grouphashes(project, hashes)

existing_grouphash = find_existing_grouphash_new(grouphashes)

Expand Down
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ def register_temporary_features(manager: FeatureManager):
manager.add("organizations:gitlab-disable-on-broken", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enable only calculating a secondary hash when needed
manager.add("organizations:grouping-suppress-unnecessary-secondary-hash", OrganizationFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)
# Allow creating `GroupHashMetadata` records
manager.add("organizations:grouphash-metadata-creation", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Allows an org to have a larger set of project ownership rules per project
manager.add("organizations:higher-ownership-limit", OrganizationFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)
# Enable increased issue_owners rate limit for auto-assignment
Expand Down
25 changes: 25 additions & 0 deletions src/sentry/grouping/ingest/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import sentry_sdk

from sentry import features, options
from sentry.exceptions import HashDiscarded
from sentry.features.rollout import in_random_rollout
from sentry.grouping.api import (
Expand All @@ -27,6 +28,7 @@
from sentry.grouping.ingest.utils import extract_hashes
from sentry.grouping.result import CalculatedHashes
from sentry.models.grouphash import GroupHash
from sentry.models.grouphashmetadata import GroupHashMetadata
from sentry.models.project import Project
from sentry.utils import metrics
from sentry.utils.metrics import MutableTags
Expand Down Expand Up @@ -334,3 +336,26 @@ def get_hash_values(
)

return (primary_hashes, secondary_hashes, all_hashes)


def get_or_create_grouphashes(
project: Project, calculated_hashes: CalculatedHashes
) -> list[GroupHash]:
grouphashes = []

for hash_value in extract_hashes(calculated_hashes):
grouphash, created = GroupHash.objects.get_or_create(project=project, hash=hash_value)

# TODO: Do we want to expand this to backfill metadata for existing grouphashes? If we do,
# we'll have to override the metadata creation date for them.
if (
created
and options.get("grouping.grouphash_metadata.ingestion_writes_enabled")
and features.has("organizations:grouphash-metadata-creation", project.organization)
):
# For now, this just creates a record with a creation timestamp
GroupHashMetadata.objects.create(grouphash=grouphash)

grouphashes.append(grouphash)

return grouphashes
7 changes: 7 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -2669,3 +2669,10 @@
default=10000,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"grouping.grouphash_metadata.ingestion_writes_enabled",
type=Bool,
default=True,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)
38 changes: 38 additions & 0 deletions tests/sentry/event_manager/test_event_manager_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@
from sentry.grouping.result import CalculatedHashes
from sentry.models.auditlogentry import AuditLogEntry
from sentry.models.group import Group
from sentry.models.grouphash import GroupHash
from sentry.models.grouphashmetadata import GroupHashMetadata
from sentry.projectoptions.defaults import DEFAULT_GROUPING_CONFIG
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers import Feature
from sentry.testutils.helpers.eventprocessing import save_new_event
from sentry.testutils.helpers.options import override_options
from sentry.testutils.pytest.fixtures import django_db_all
from sentry.testutils.silo import assume_test_silo_mode_of
from sentry.testutils.skips import requires_snuba
Expand Down Expand Up @@ -165,6 +169,40 @@ def test_auto_updates_grouping_config(self):
)
assert actual_expiry == expected_expiry or actual_expiry == expected_expiry - 1

def test_creates_grouphash_metadata_when_appropriate(self):

# The killswitch is obeyed
with override_options({"grouping.grouphash_metadata.ingestion_writes_enabled": False}):
event1 = save_new_event({"message": "Dogs are great!"}, self.project)
grouphash = GroupHash.objects.filter(
project=self.project, hash=event1.get_primary_hash()
).first()
assert grouphash and grouphash.metadata is None

# The feature flag is obeyed
with Feature({"organizations:grouphash-metadata-creation": False}):
event2 = save_new_event({"message": "Sit! Good dog!"}, self.project)
grouphash = GroupHash.objects.filter(
project=self.project, hash=event2.get_primary_hash()
).first()
assert grouphash and grouphash.metadata is None

with Feature({"organizations:grouphash-metadata-creation": True}):
# New hashes get metadata
event3 = save_new_event({"message": "Adopt, don't shop"}, self.project)
grouphash = GroupHash.objects.filter(
project=self.project, hash=event3.get_primary_hash()
).first()
assert grouphash and isinstance(grouphash.metadata, GroupHashMetadata)

# For now, existing hashes aren't backfiled when new events are assigned to them
event4 = save_new_event({"message": "Dogs are great!"}, self.project)
assert event4.get_primary_hash() == event1.get_primary_hash()
grouphash = GroupHash.objects.filter(
project=self.project, hash=event4.get_primary_hash()
).first()
assert grouphash and grouphash.metadata is None


class PlaceholderTitleTest(TestCase):
"""
Expand Down

0 comments on commit f78e1bc

Please sign in to comment.