Skip to content

Commit

Permalink
Merge pull request #54 from fbinz/master
Browse files Browse the repository at this point in the history
Add support for regular expressions in SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST
  • Loading branch information
lociii committed Jun 4, 2024
2 parents b72bc91 + a14cd0e commit d7faa4f
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 3 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,8 @@ and booleans (usually) can't contain sensitive personal data. These fields will

Whitelists a list of models which will not be checked during `scrub_validation` and when
activating the strict mode. Defaults to the non-privacy-related Django base models.
Items can either be full model names (e.g. `auth.Group`) or regular expression patterns matching
against the full model name (e.g. `re.compile(auth.*)` to whitelist all auth models).

(default: ['auth.Group', 'auth.Permission', 'contenttypes.ContentType', 'sessions.Session', 'sites.Site',
'django_scrubber.FakeData',))
Expand Down
17 changes: 16 additions & 1 deletion django_scrubber/services/validator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import re
from typing import Union

from django.apps import apps

from django_scrubber import settings_with_fallback
Expand All @@ -8,6 +11,15 @@ class ScrubberValidatorService:
Service to validate if all text-based fields are being scrubbed within your project and dependencies.
"""

@staticmethod
def check_pattern(pattern: Union[str, re.Pattern], value):
if isinstance(pattern, str):
return pattern == value
elif isinstance(pattern, re.Pattern):
return pattern.fullmatch(value)
else:
raise ValueError("Invalid pattern type")

def process(self) -> dict:
from django_scrubber.management.commands.scrub_data import _get_model_scrubbers

Expand All @@ -24,7 +36,10 @@ def process(self) -> dict:
for model in model_list:

# Check if model is whitelisted
if model._meta.label in model_whitelist:
if any(
self.check_pattern(pattern, model._meta.label)
for pattern in model_whitelist
):
continue

text_based_fields = []
Expand Down
13 changes: 13 additions & 0 deletions tests/services/test_validator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

try:
from unittest import mock
except ImportError:
Expand Down Expand Up @@ -62,3 +64,14 @@ def test_process_scrubber_required_field_type_variable_used(self):
result = service.process()

self.assertEqual(len(result), 0)

@override_settings(
SCRUBBER_REQUIRED_FIELD_MODEL_WHITELIST=[re.compile("auth.*")],
)
def test_process_scrubber_required_field_model_whitelist_regex(self):
service = ScrubberValidatorService()
result = service.process()

model_list = tuple(result.keys())
self.assertNotIn('auth.User', model_list)
self.assertNotIn('auth.Permission', model_list)
4 changes: 2 additions & 2 deletions tests/test_scrubbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ def test_faker_scrubber_with_provider_arguments(self):
data.refresh_from_db()

# The EAN Faker will by default emit ean13, so this would fail if the parameter was ignored
self.assertEquals(8, len(data.ean8))
self.assertEqual(8, len(data.ean8))

# Add a new scrubber for ean13
with self.settings(DEBUG=True, SCRUBBER_GLOBAL_SCRUBBERS={'ean8': scrubbers.Faker('ean', length=13)}):
call_command('scrub_data')
data.refresh_from_db()

# make sure it doesn't reuse the ean with length=8 scrubber
self.assertEquals(13, len(data.ean8))
self.assertEqual(13, len(data.ean8))

def test_faker_scrubber_datefield(self):
"""
Expand Down

0 comments on commit d7faa4f

Please sign in to comment.