@@ -159,38 +159,77 @@ def __redact_request_dictionary(
159
159
def __redact_string (
160
160
self ,
161
161
input_string : Union [str , bytes ],
162
- key : str ,
162
+ trait_key : str ,
163
163
) -> Union [str , bytes ]:
164
- """
164
+ r """
165
165
Redacts characters in a string between a starting index and ending tag.
166
166
Replaces the identified characters with '********' regardless of the original length.
167
+
168
+ This function employs the following regular expressions explained below
169
+
170
+ Regex 1 ("quoted")
171
+ This is designed to match the pattern TRAIT('value') by matching the TRAIT name:
172
+ {trait_key.upper()}, a variable (potentially zero) amount of spaces ( +){{0,}}, then the
173
+ ('value') portion which must start and end with (' and '), but can conceivably contain
174
+ any characters, but a negative lookbehind is used to look for any unescaped single quotes
175
+ .*?(?<!\') that would indicate the matching of (' and ') is otherwise a coincidence.
176
+
177
+ Regex 2 ("nested")
178
+ This is designed to match the pattern TRAIT( subtrait1(value) subtrait2(value)) by
179
+ matching the TRAIT name: {trait_key.upper()}, a variable (potentially zero) amount of
180
+ spaces ( +){{0,}}, then the ( subtrait1(value) subtrait2(value)) portion which must
181
+ start and end with ( and ), but must also contain a nested set of opened parentheses
182
+ rather than a direct seqence of them. The pattern \([^)]*\( looks for these nested
183
+ open parenthesis as a sequence would have a ) character between them. Then the expression
184
+ allows any non-newline characters: .* and the "end pattern" of ) and ) separated by a
185
+ variable (potentially zero) whitespace.
186
+
187
+ If neither of these two patterns is found for a supplied trait_key, then it is assumed
188
+ this trait is set with the default pattern below.
189
+
190
+ Regex 3 ("default")
191
+ This is designed to match the pattern TRAIT(value) by matching the TRAIT name:
192
+ {trait_key.upper()}, a variable (potentially zero) amount of spaces ( +){{0,}}, then the
193
+ (value) portion which must start and end with ( and ), but can conceivably contain any
194
+ characters, but a negative lookbehind is used to look for any escape \ character .*?(?<!\\)
195
+ that would indicate the matching of the ( and ) is otherwise a coincidence.
196
+
197
+ In all replacement expressions, the variable amounts of whitespace are captured so that
198
+ they can be preserved by this redaction operations. This is indicated by the \1 in the
199
+ replacement string.
200
+
167
201
"""
202
+ # Regex 1 ("quoted") - {trait_key.upper()}( +){{0,}}\(\'.*?(?<!\')\'\)
203
+ # Regex 2 ("nested") - {trait_key.upper()}( +){{0,}}\([^)]*\(.*\)( +){{0,}}\)
204
+ # Regex 3 ("default") - {trait_key.upper()}( +){{0,}}\(.*?(?<!\\)\)
168
205
asterisks = "********"
169
206
is_bytes = False
170
207
if isinstance (input_string , bytes ):
171
208
input_string = input_string .decode ("cp1047" )
172
209
is_bytes = True
173
- quoted = re .search (rf"{ key .upper ()} ( +){{0,}}\(\'.*?(?<!\')\'\)" , input_string )
210
+ quoted = re .search (
211
+ rf"{ trait_key .upper ()} ( +){{0,}}\(\'.*?(?<!\')\'\)" , input_string
212
+ )
174
213
nested = re .search (
175
- rf"{ key .upper ()} ( +){{0,}}\([^)]*\(.*\)( +){{0,}}\)" , input_string
214
+ rf"{ trait_key .upper ()} ( +){{0,}}\([^)]*\(.*\)( +){{0,}}\)" , input_string
176
215
)
177
216
if quoted is not None :
178
217
input_string = re .sub (
179
- rf"{ key .upper ()} ( +){{0,}}\(\'.*?(?<!\')\'\)" ,
180
- rf"{ key .upper ()} \1('{ asterisks } ')" ,
218
+ rf"{ trait_key .upper ()} ( +){{0,}}\(\'.*?(?<!\')\'\)" ,
219
+ rf"{ trait_key .upper ()} \1('{ asterisks } ')" ,
181
220
input_string ,
182
221
)
183
222
else :
184
223
if nested is not None :
185
224
input_string = re .sub (
186
- rf"{ key .upper ()} ( +){{0,}}\([^)]*\(.*\)( +){{0,}}\)" ,
187
- rf"{ key .upper ()} \1({ asterisks } )" ,
225
+ rf"{ trait_key .upper ()} ( +){{0,}}\([^)]*\(.*\)( +){{0,}}\)" ,
226
+ rf"{ trait_key .upper ()} \1({ asterisks } )" ,
188
227
input_string ,
189
228
)
190
229
else :
191
230
input_string = re .sub (
192
- rf"{ key .upper ()} ( +){{0,}}\(.*?(?<!\\)\)" ,
193
- rf"{ key .upper ()} \1({ asterisks } )" ,
231
+ rf"{ trait_key .upper ()} ( +){{0,}}\(.*?(?<!\\)\)" ,
232
+ rf"{ trait_key .upper ()} \1({ asterisks } )" ,
194
233
input_string ,
195
234
)
196
235
if is_bytes :
@@ -202,11 +241,20 @@ def redact_request_xml(
202
241
xml_string : Union [str , bytes ],
203
242
secret_traits : dict ,
204
243
) -> Union [str , bytes ]:
205
- """
244
+ r """
206
245
Redact a list of specific secret traits in a request xml string or bytes object.
207
246
Based the following xml pattern:
208
247
'<xmltag attribute="any">xml value</xmltag>'
209
248
This function also accounts for any number of arbitrary xml attributes.
249
+
250
+ This function employs the following regular expression:
251
+ {xml_key}(.*)>.*<\/{xml_key} - Designed to match the above pattern by starting and ending
252
+ with the xmltag string as shown, but the starting tage allows for any characters between
253
+ "xmltag" and the > character to allow for the attribute specification shown above. This
254
+ results in the starting of the xml as {xml_key}(.*)> and the ending as <\/{xml_key}.
255
+ The miscellaneous characters are "captured" as a variable and preserved by the
256
+ substitution operation through the use of the \1 supplied in the replacement string.
257
+ Between these tags, any non-newline characters are allowed using the .* expression.
210
258
"""
211
259
is_bytes = False
212
260
if isinstance (xml_string , bytes ):
0 commit comments