From d2bb97a477fbf9cf5b9a449cae69474e5a237ffb Mon Sep 17 00:00:00 2001 From: Jim King Date: Sat, 12 Sep 2020 10:14:23 -0400 Subject: [PATCH] ResultHandlingFlag for control of results --- CHANGELOG.md | 5 +- interposer/__init__.py | 2 + interposer/interposer.py | 195 +++++++++++++++++++++++++++++---------- tests/interposer_test.py | 165 ++++++++++++++++++++++++++++++--- 4 files changed, 302 insertions(+), 65 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7495277..556e6d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Breaking +- Signatures of most of the cleanup methods have changed. - `wrap` no longer requires (or accepts) `as_method` for wrapping class instantiations. - `wrap` raises WrappingError if something is not wrappable. @@ -21,9 +22,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added support for wrapping object instantiations from class definitions. - Added support for builtins. - Added logging control for wrapping and calling. +- Added support for conditionally replacing original return value with cleaned one. +- Added support for conditionally not recording a call. - Consolidated determination of whether something is `wrappable`. - Fixed wrapping of property results. -- Fixed unnecessary wrapping of python primitives like `str`. +- Fixed incorrect wrapping of python primitives like `str`. - Fixed call order parameter storage could be modified after call. ## [0.6.1] - 2019-09-04 diff --git a/interposer/__init__.py b/interposer/__init__.py index 580c392..245ebc4 100644 --- a/interposer/__init__.py +++ b/interposer/__init__.py @@ -10,6 +10,7 @@ "InterposerError", "Mode", "PlaybackError", + "ResultHandlingFlag", "ScopedInterposer", "WrappingError", ] @@ -23,6 +24,7 @@ Interposer, InterposerEncoder, Mode, + ResultHandlingFlag, ScopedInterposer, ) from .testcase import InterposedTestCase diff --git a/interposer/interposer.py b/interposer/interposer.py index 1c4f5af..c596ff9 100644 --- a/interposer/interposer.py +++ b/interposer/interposer.py @@ -9,10 +9,12 @@ import types from contextlib import AbstractContextManager +from contextlib import contextmanager from copy import deepcopy from datetime import datetime from enum import auto from enum import Enum +from enum import Flag from hashlib import sha1 from pathlib import Path from pprint import pformat @@ -36,6 +38,19 @@ class Mode(Enum): Recording = auto() +class ResultHandlingFlag(Flag): + """ + Specifies result handling behavior. + """ + + # causes the result to be recorded + RECORD = auto() + + # causes the cleaned result to be returned to the caller + # normally the original result is returned to the caller + REPLACE = auto() + + class InterposerEncoder(json.JSONEncoder): """ Handles conversion of commonly used types not normally convertible @@ -210,35 +225,65 @@ def wrappable(self, thing) -> bool: ) return result - def cleanup_exception_pre(self, ex): + @contextmanager + def cleanup_exception_pre(self, params: Dict, ex: Exception) -> None: """ When an exception is going to be recorded, this intercept allows the exception to be changed. This is necessary for any exception that - cannot be pickled. + cannot be pickled or contains sensitive information. + + This is a context manager so that the content going into the recording + can be different than what is returned. - Common ways to deal with pickling errors here are: + Common ways to deal with pickling errors are: - Set one of the properties to None - Return a doppleganger class (looks like, smells like, but does not derive from the original). + + Arguments: + params: the original call parameters + ex: the original exception + + Yields: + - the (possibly) modified exception """ - return ex + yield ex - def cleanup_exception_post(self, ex): + def cleanup_exception_post(self, params: Dict, ex: Exception) -> Exception: """ Modify an exception during playback before it is thrown. + + Returns: + the object to be raised as the result """ return ex - def cleanup_parameters_pre(self, params): + def cleanup_parameters_pre(self, params: Dict) -> Dict: """ - Allows the data in the parameters (this uniquely identifies a request) - to be modified. This is useful in wiping out any credentials or other - sensitive information. When replaying in tests, if you set these bits - to the same value, the recorded playback will match. + Allows the data used to uniquely identify the call to be scrubbed + of sensitive content. A common technique is to replace a secret + stored in the params with a well-known string, and then use that + well-known string at playback time as the secret that is passed + in. + + Note that the args and kwargs in params are the actual call + args and kwargs. If you need to make changes you should return + a copy with the modification so you are not modifying anything + in the caller. + + For example if a class takes a password, during recording you + would pass in the actual password and modify it here to be "PASSWORD". + Then during playback if you pass in the password "PASSWORD", the + call signature will match what was recorded. + + Note that the entire params structure is recorded in a call log in + the recording database - not just a hash of the params. FIXME: we + could instead encode the ordinal call number into the params and + hash it, and rely on the log output to diagnose misalignment. """ return params - def cleanup_parameters_post(self, params): + def cleanup_parameters_post(self, params: Dict) -> Dict: """ Modify parameters during playback before they are hashed to locate a recording. This usually does the same thing as @@ -246,28 +291,63 @@ def cleanup_parameters_post(self, params): """ return params - def cleanup_result_pre(self, params, result): + @contextmanager + def cleanup_result_pre( + self, params: Dict, result: object, flags: ResultHandlingFlag + ) -> None: """ - Some return values cannot be pickled. This interceptor allows you to - rewrite the result so that it can be. Sometimes this means removing - a property (setting it to None), sometimes it means replacing the - result with something else entirely (a doppleganger with the same - methods and properties as the original, but isn't derived from it). + This cleanup allows you to modify the result before it is written. - Common ways to deal with pickling errors here are: - - Set one of the properties to None - - Return a doppleganger class (looks like, smells like, but does not - derive from the original). + This is implemented as a context manager so that the content that gets + recorded can be different than the original return value. + There are four patterns generally found when handling results: + + 1. The result contains no sensitive data and can be pickled. + 2. The result cannot be pickled. + 3. The result contains sensitive data that should not be recorded. + 4. The result is a generator. + + The most common pickle-incompatible result is a local class + definition that gets returned. Pickle cannot reconstitute such + a class because it is not in the global namespace. In this case + you can provide a stand-in that behaves the same way as the original. + This stand-in gets recorded and used during playback, however the + original result continues to be used during the remainder of the + recording. This means it is possible that recording will succeed + but playback will fail if the stand-in is not accurate in behavior. + + For results with sensitive data, you should redact that data + before yielding then replace it before returning. The recording + will contain, and playback will have a redacted object, but the + remainder of the recording run will use the original result with the + secret intact. + + For generator results, the process of recording drains the generator, + so it is recommended that you convert the generator to a suitable + container and drain the contents, and then tell the context to replace + the original result with the cleaned-up result. + + FIXME: + The params are given to help identify the call, but we should + be passing in the complete context of the call. + + Yields: + a tuple of: + - the result to be recorded and used during subsequent playback + - result handling flags """ - return result + yield result, flags - def cleanup_result_post(self, result): + def cleanup_result_post(self, result: object) -> object: """ Modify the return value during playback before it is returned. + + Returns: + the object to be returned as the result """ return result - def clear_for_execution(self, params) -> None: + def clear_for_execution(self, params: Dict) -> None: """ Called before any method is actually executed. This can be used to implement a mechanism that ensures only certain methods are called. @@ -295,44 +375,41 @@ def _playback(self, params: dict) -> object: Returns: Whatever object was stored """ - new_params = self.cleanup_parameters_post(params) prefix = sha1( # nosec - json.dumps(new_params, sort_keys=True, cls=self.encoder).encode() + json.dumps(params, sort_keys=True, cls=self.encoder).encode() ).hexdigest() result_key = f"{prefix}.results" # Check the call order - if not an exact match something changed. if self.call_order: - channel = new_params.get("channel") + channel = params.get("channel") if channel: index = self.call_order[channel].get("call_index", 0) calls = self.call_order[channel]["calls"] if len(calls) <= index: raise PlaybackError("Not enough calls recorded to satisfy.") - if new_params != calls[index]: + if params != calls[index]: msg = f"Call #{index} is different than what was recorded. " msg += "Please re-record and/or resolve non-idempotent (random) behavior. " - msg += f"This call: {new_params}; Recorded call: {calls[index]}" + msg += f"This call: {params}; Recorded call: {calls[index]}" raise PlaybackError(msg) self.call_order[channel]["call_index"] = index + 1 # record the call in the playback_call_order list - if new_params["channel"] in self.playback_call_order: - self.playback_call_order[new_params["channel"]]["calls"].append( - new_params - ) + if params["channel"] in self.playback_call_order: + self.playback_call_order[params["channel"]]["calls"].append(params) else: - self.playback_call_order[new_params["channel"]] = {} - self.playback_call_order[new_params["channel"]]["calls"] = [new_params] + self.playback_call_order[params["channel"]] = {} + self.playback_call_order[params["channel"]]["calls"] = [params] located = self.tape.get(result_key) if not located: - raise PlaybackError(f"No calls for params {new_params} were ever recorded.") + raise PlaybackError(f"No calls for params {params} were ever recorded.") index = self.playback_index.get(result_key, 0) if len(located) <= index: raise PlaybackError( - f"Call #{index} for params {new_params} was never recorded." + f"Call #{index} for params {params} was never recorded." ) recorded = located[index] self.playback_index[result_key] = index + 1 @@ -349,7 +426,7 @@ def _playback(self, params: dict) -> object: "result", "playback", f"playing back RESULT for {result_key} call #{index} " - f"for params {new_params} hash={prefix} " + f"for params {params} hash={prefix} " f"type={(result.__class__.__name__ if result is not None else 'None')}: " f"{pformat(result)}", ) @@ -359,9 +436,9 @@ def _playback(self, params: dict) -> object: "except", "playback", f"playing back EXCEPTION for {result_key} call #{index} " - f"for params {new_params} hash={prefix}: {str(exception)}", + f"for params {params} hash={prefix}: {str(exception)}", ) - raise self.cleanup_exception_post(exception) + raise self.cleanup_exception_post(params, exception) def _record(self, params: dict, result: object, exception: object = None): """ @@ -472,7 +549,9 @@ def __call__(self, *args, **kwargs): ] = self._self_channel if self._self_interposer.mode == Mode.Playback: self._self_interposer.clear_for_execution(params) - result = self._self_interposer._playback(params) + result = self._self_interposer._playback( + self._self_interposer.cleanup_parameters_post(params) + ) if isinstance(self.__wrapped__, type): # instantiating an object from a class definition requires # us to wrap the result so that we can capture the rest of @@ -487,11 +566,24 @@ def __call__(self, *args, **kwargs): self._log( "call", f"calling {self.__wrapped__} and recording result" ) - result = self._self_interposer.cleanup_result_pre( - params, super().__call__(*args, **kwargs) - ) - params = self._self_interposer.cleanup_parameters_pre(params) - self._self_interposer._record(params, result) + result = super().__call__(*args, **kwargs) + with self._self_interposer.cleanup_result_pre( + params, result, ResultHandlingFlag.RECORD + ) as (scrubbed_result, flags): + if flags & ResultHandlingFlag.RECORD: + self._self_interposer._record( + self._self_interposer.cleanup_parameters_pre(params), + scrubbed_result, + ) + if flags & ResultHandlingFlag.REPLACE: + self._log( + "call", + ( + f"cleanup {self._self_interposer.cleanup_parameters_pre} is replacing the " + f"original result of type {type(result)} with type {type(scrubbed_result)}" + ), + ) + result = scrubbed_result if isinstance(self.__wrapped__, type): # instantiating an object from a class definition requires # us to wrap the result so that we can capture the rest of @@ -501,9 +593,14 @@ def __call__(self, *args, **kwargs): ) return result except Exception as ex: - params = self._self_interposer.cleanup_parameters_pre(params) - ex = self._self_interposer.cleanup_exception_pre(ex) - self._self_interposer._record(params, None, exception=ex) + with self._self_interposer.cleanup_exception_pre( + params, ex + ) as scrubbed_ex: + self._self_interposer._record( + self._self_interposer.cleanup_parameters_pre(params), + None, + exception=scrubbed_ex, + ) raise ex else: # not a class definition, method, or function diff --git a/tests/interposer_test.py b/tests/interposer_test.py index 118ccf7..90b36a7 100644 --- a/tests/interposer_test.py +++ b/tests/interposer_test.py @@ -6,17 +6,22 @@ import logging import shutil import tempfile +import types import unittest import uuid +from contextlib import contextmanager +from copy import deepcopy from datetime import datetime from enum import Enum from pathlib import Path from typing import Dict +from typing import Generator from interposer import Interposer from interposer import Mode from interposer import PlaybackError +from interposer import ResultHandlingFlag from interposer import ScopedInterposer from interposer.interposer import _InterposerWrapper @@ -41,10 +46,17 @@ class MyVerySpecificError(RuntimeError): pass -class SomeClass(object): +def numberGenerator(n): + if n < 20: + number = 0 + while number < n: + yield number + number += 1 + else: + return - throw_exception = True +class SomeClass(object): def __init__(self, result: object, secret: str = None): """ Store a result for say_hi. If this derives from Exception then @@ -53,6 +65,9 @@ def __init__(self, result: object, secret: str = None): self.result = result self.auth = {"secret": secret} + self.throw_exception = True + self.throw_if_range_called = False + def say_hi(self, greeting: str = "hello", second: object = None) -> str: """ Returns the result stored in the initializer. @@ -68,6 +83,16 @@ def give_up(self): if self.throw_exception: raise MyVerySpecificError("ouchies") + def generated_numbers(self, n: int) -> Generator[int, None, None]: + """ + Returns a generator. These are harder to deal with + because they only generate their contents once, so we + have to extract the contents before we record and return. + """ + if self.throw_if_range_called: + raise NotImplementedError("should have satisfied result from recording!") + return numberGenerator(5) + @property def get_complex_stuff(self): """ @@ -86,22 +111,76 @@ class SomeClassSecretRemoverInterposer(Interposer): 2. By removing it from any result. """ - def cleanup_parameters_pre(self, params) -> Dict: + def cleanup_parameters_pre(self, params: Dict) -> Dict: """ Remove the secret from the parameters used to initialize it in the recording. """ if "secret" in params["kwargs"]: - params["kwargs"]["secret"] = "REDACTED_SECRET" # nosec - return params - - def cleanup_result_pre(self, params, result) -> object: + result = deepcopy(params) + result["kwargs"]["secret"] = "REDACTED_SECRET" # nosec + return super().cleanup_parameters_pre(result) + else: + return params + + @contextmanager + def cleanup_result_pre( + self, params: Dict, result: object, flags: ResultHandlingFlag + ) -> None: """ Remove the secret from the initialized class in the recording. """ if "secret" in params["kwargs"]: + original_secret = result.__dict__["auth"]["secret"] result.__dict__["auth"]["secret"] = "REDACTED_SECRET" # nosec - return result + # this is good form in case there are stacked interposers + with super().cleanup_result_pre(params, result, flags=flags) as ( + scrubbed_result, + flags, + ): + yield scrubbed_result, flags + result.__dict__["auth"]["secret"] = original_secret + else: + yield result, flags + + +class SomeClassGeneratorInterposer(Interposer): + """ + Tests handling generators by overwriting the original result. + """ + + @contextmanager + def cleanup_result_pre( + self, params: Dict, result: object, flags: ResultHandlingFlag + ) -> None: + if params["method"] == "generated_numbers": + assert isinstance(result, types.GeneratorType) + new_result = list(result) + flags |= ResultHandlingFlag.REPLACE + # this is good form in case there are stacked interposers + with super().cleanup_result_pre(params, new_result, flags=flags) as ( + scrubbed_result, + flags, + ): + yield scrubbed_result, flags + + +class SomeClassRecordNothingInterposer(Interposer): + """ + Not very practical but here we simply state that no recording should be done. + """ + + @contextmanager + def cleanup_result_pre( + self, params: Dict, result: object, flags: ResultHandlingFlag + ) -> None: + flags ^= ResultHandlingFlag.RECORD + # this is good form in case there are stacked interposers + with super().cleanup_result_pre(params, result, flags=flags) as ( + scrubbed_result, + flags, + ): + yield scrubbed_result, flags class InterposerTest(unittest.TestCase): @@ -110,7 +189,6 @@ class InterposerTest(unittest.TestCase): def setUp(self): self.datadir = Path(tempfile.mkdtemp()) - SomeClass.throw_exception = True def tearDown(self): shutil.rmtree(str(self.datadir)) @@ -163,6 +241,27 @@ def test_ok_additional_types(self): assert wt.say_hi(second=MyEnum.FOO) == "hello True MyEnum.FOO" with self.assertRaises(PlaybackError): wt.say_hi(second="foobar") # never called during recording + wt.throw_if_range_called = True + + def test_generators(self): + """ + Tests how we handle generators. + """ + uut = SomeClassGeneratorInterposer(self.datadir / "recording", Mode.Recording) + uut.open() + wt = uut.wrap(SomeClass(True)) + # although it would normally return a generator, in order to record + # it we drain the generator to a list, record it, and return the list + # based on how SomeClassGeneratorInterposer is configured + assert wt.generated_numbers(5) == [0, 1, 2, 3, 4] + uut.close() + + with ScopedInterposer(self.datadir / "recording", Mode.Playback) as uut: + wt = uut.wrap(SomeClass(False)) + wt.throw_if_range_called = True + # if the method is actually called it will now throw + # instead we prove it is satisfying the result from the recording + assert wt.generated_numbers(5) == [0, 1, 2, 3, 4] def test_wrappable(self): """ @@ -219,7 +318,7 @@ def test_good_class_wrapping(self): with self.assertRaises(MyVerySpecificError) as re: wt.give_up() self.assertRegex("ouchies", str(re.exception)) - SomeClass.throw_exception = False + wt.throw_exception = False self.assertIsNone(wt.give_up()) with ScopedInterposer(self.datadir / "recording", Mode.Playback) as uut: @@ -229,10 +328,12 @@ def test_good_class_wrapping(self): self.assertIn("say_hi", dir(wt)) # but we are playing back so it returns what was recorded self.assertEqual(wt.say_hi(), "hello True") - # SomeClass is currently set not to throw, but we're playing back - # so we replay the exception - with self.assertRaises(MyVerySpecificError) as re: + # prove we replay exceptions + wt.throw_exception = False # but the original call threw + with self.assertRaises(MyVerySpecificError): wt.give_up() + wt.throw_exception = True # but the original call did not + self.assertIsNone(wt.give_up()) def test_property_handling(self): """ @@ -330,6 +431,25 @@ def test_playback_cannot_replay(self): # was never called a second time wt.say_hi() + def test_result_flags_disable_recording(self): + """ + Tests that recording flags can stop recording. + + Not intended to be an example, just to exercise the record flag turned off. + """ + uut = SomeClassRecordNothingInterposer( + self.datadir / "recording", Mode.Recording + ) + uut.open() + wt = uut.wrap(SomeClass) + wt(True) + uut.close() + + with ScopedInterposer(self.datadir / "recording", Mode.Playback) as uut: + wt = uut.wrap(SomeClass) + with self.assertRaises(PlaybackError): + wt(True) + def test_recording_contains_no_secret(self): """ Check that if we properly implement cleanup_parameters_pre, any secret @@ -342,7 +462,11 @@ def test_recording_contains_no_secret(self): ) uut.open() secret = str(uuid.uuid4()) - wt = uut.wrap(SomeClass) # wraps the class definition + # wrap the class definition so that we record the __init__ behavior + wt = uut.wrap(SomeClass) + # by instantiating an object from the wrapped class definition, we + # end up storing "t" in the database (pickled), and during playback + # when "t" is instantiated is is provided entirely from the recording. t = wt(True, secret=secret) self.assertEqual(t.say_hi(), "hello True") uut.close() @@ -350,6 +474,17 @@ def test_recording_contains_no_secret(self): with (self.datadir / "recording").open("rb") as fp: data = fp.read() assert ( - "REDACTED_SECRET".encode() in data + "REDACTED_SECRET".encode() in data # nosec ), "did not find redacted secret in data file" assert secret.encode() not in data, "found original secret in data file" + + with ScopedInterposer(self.datadir / "recording", Mode.Playback) as uut2: + wt = uut2.wrap(SomeClass) + with self.assertRaises(PlaybackError): + # due to the current interposer design, we store the argument lists + # in the database so we have to scrub away the secret in the params + # which means on playback runs we must provide the redacted version + # in order to match + wt(True, secret=secret) + t = wt(True, secret="REDACTED_SECRET") # nosec + assert t.say_hi() == "hello True"