Skip to content

Commit 87b05e7

Browse files
authored
Merge pull request #8 from gabrielweiz/feature/mask-sensitive-data-log
adding settings options with regex to be able to mask sensitive data
2 parents c24abca + 19e1c6c commit 87b05e7

File tree

3 files changed

+104
-0
lines changed

3 files changed

+104
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ When you run your spider you will see a log like below when spider is closing:
2727

2828
* `SETTINGS_LOGGING_REGEX` - Add a regular expression to only show some settings - for example `SETTINGS_LOGGING_REGEX = "SPIDERMON"` will show settings with SPIDERMON in their name.
2929
* `SETTINGS_LOGGING_INDENT` - Add indentation to make log more human-readable.
30+
* `MASKED_SENSITIVE_SETTINGS_ENABLED` - Default is `True` - if settings logging is enabled it will mask the value of settings that may be sensitive (password, apikey). For example AWS_SECRET_ACCESS_KEY will have their value shown as **********
3031

3132
## Advanced
3233

src/scrapy_settings_log/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99

1010
logger = logging.getLogger(__name__)
1111

12+
DEFAULT_REGEXES = [
13+
".*(?i)(api[\W_]*key).*", # apikey and variations e.g: shub_apikey or SC_APIKEY
14+
".*(?i)(AWS[\W_]*(SECRET[\W_]*)?(ACCESS)?[\W_]*(KEY|ACCESS[\W_]*KEY))", # AWS_SECRET_ACCESS_KEY and variations
15+
".*(?i)([\W_]*password[\W_]*).*" # password word
16+
]
17+
1218

1319
def prepare_for_json_serialization(obj):
1420
"""Prepare the obj recursively for JSON serialization.
@@ -51,6 +57,11 @@ def spider_closed(self, spider):
5157
regex = settings.get("SETTINGS_LOGGING_REGEX")
5258
if regex is not None:
5359
settings = {k: v for k, v in settings.items() if re.search(regex, k)}
60+
if spider.settings.getbool("MASKED_SENSITIVE_SETTINGS_ENABLED", True):
61+
regex_list = spider.settings.getlist("MASKED_SENSITIVE_SETTINGS_REGEX_LIST", DEFAULT_REGEXES)
62+
for reg in regex_list:
63+
updated_settings = {k: '**********' if v else v for k, v in settings.items() if re.match(reg, k)}
64+
settings = {**settings, **updated_settings}
5465

5566
self.output_settings(settings, spider)
5667

tests/test_code.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,95 @@ class CustomClass:
103103
logger.spider_closed(spider)
104104

105105
assert '{"DUMMY_CUSTOM_CLASS": {"CustomClass": "/foo/bar"}}' in caplog.text
106+
107+
108+
def test_log_all_should_not_return_apikey_value_by_default(caplog):
109+
settings = {
110+
"SETTINGS_LOGGING_ENABLED": True,
111+
"SHUB_APIKEY": 'apikey_value1',
112+
"shub_apikey": 'apikey_value2',
113+
"api_key": 'apikey_value3',
114+
}
115+
116+
spider = MockSpider(settings)
117+
logger = SpiderSettingsLogging()
118+
with caplog.at_level(logging.INFO):
119+
logger.spider_closed(spider)
120+
121+
assert '"SHUB_APIKEY": "**********"' in caplog.text
122+
assert '"shub_apikey": "**********"' in caplog.text
123+
assert '"api_key": "**********"' in caplog.text
124+
assert 'apikey_value' not in caplog.text
125+
126+
127+
def test_log_all_should_return_apikey_value_if_MASKED_SENSITIVE_SETTINGS_ENABLED_is_false(caplog):
128+
settings = {
129+
"SETTINGS_LOGGING_ENABLED": True,
130+
"APIKEY": 'apikey_value',
131+
"MASKED_SENSITIVE_SETTINGS_ENABLED": False,
132+
}
133+
134+
spider = MockSpider(settings)
135+
logger = SpiderSettingsLogging()
136+
with caplog.at_level(logging.INFO):
137+
logger.spider_closed(spider)
138+
139+
assert '"APIKEY": "apikey_value"' in caplog.text
140+
141+
142+
def test_log_all_should_not_return_aws_secret_key_value_by_default(caplog):
143+
settings = {
144+
"SETTINGS_LOGGING_ENABLED": True,
145+
"AWS_SECRET_ACCESS_KEY": 'secret_value1',
146+
"aws_secret_access_key": 'secret_value2',
147+
"aws_access_key": 'secret_value2',
148+
"AWS_SECRET_KEY": 'secret_value2',
149+
"aws_secret_key": 'secret_value2',
150+
}
151+
152+
spider = MockSpider(settings)
153+
logger = SpiderSettingsLogging()
154+
with caplog.at_level(logging.INFO):
155+
logger.spider_closed(spider)
156+
157+
assert '"AWS_SECRET_ACCESS_KEY": "**********"' in caplog.text
158+
assert '"aws_secret_access_key": "**********"' in caplog.text
159+
assert '"aws_access_key": "**********"' in caplog.text
160+
assert '"AWS_SECRET_KEY": "**********"' in caplog.text
161+
assert '"aws_secret_key": "**********"' in caplog.text
162+
assert 'secret_value' not in caplog.text
163+
164+
165+
def test_log_all_should_not_return_password_value_by_default(caplog):
166+
settings = {
167+
"SETTINGS_LOGGING_ENABLED": True,
168+
"test_password": 'secret_value1',
169+
"PASSWORD_TEST": 'secret_value2',
170+
}
171+
172+
spider = MockSpider(settings)
173+
logger = SpiderSettingsLogging()
174+
with caplog.at_level(logging.INFO):
175+
logger.spider_closed(spider)
176+
177+
assert '"test_password": "**********"' in caplog.text
178+
assert '"PASSWORD_TEST": "**********"' in caplog.text
179+
assert 'secret_value' not in caplog.text
180+
181+
182+
def test_log_all_should_return_only_the_custom_regex_data_masked_if_MASKED_SENSITIVE_SETTINGS_REGEX_LIST_configured(caplog):
183+
settings = {
184+
"SETTINGS_LOGGING_ENABLED": True,
185+
"MASKED_SENSITIVE_SETTINGS_REGEX_LIST": ["apppppppikey"],
186+
"APIKEY": 'apikey_value1',
187+
"apppppppikey": 'some_random_value',
188+
}
189+
190+
spider = MockSpider(settings)
191+
logger = SpiderSettingsLogging()
192+
with caplog.at_level(logging.INFO):
193+
logger.spider_closed(spider)
194+
195+
assert 'apikey_value1' in caplog.text
196+
assert '"apppppppikey": "**********"' in caplog.text
197+
assert 'some_random_value' not in caplog.text

0 commit comments

Comments
 (0)