From 314ed8311c3a00390683d5c6dd8a47b676d1dfec Mon Sep 17 00:00:00 2001 From: Emily Cheng Date: Tue, 19 Nov 2024 16:13:58 -0800 Subject: [PATCH] Docs, lint, reorganize changelog properly --- CHANGELOG.md | 20 +++---- ...data-quality-rule-definition-guidelines.md | 56 ++++++++++++------- nacc_form_validator/errors.py | 6 +- nacc_form_validator/nacc_validator.py | 10 ++-- 4 files changed, 55 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f47f503..6561bc3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,36 +4,36 @@ Documentation of release versions of `nacc-form-validator` ## 0.4.0 -* Updates `_validate_compute_gds` to remove partial GDS score calculation -* Updates `_check_subschema_valid` to accept an optional record parameter, defaults to the document - used for rules that require the previous record -* Updates `compare_with` to support the `abs` operator -* Updates `compatibility` error message to be more verbose -* Refactors the tests to be more modularized so that they're more manageable -* Refactors logic for `compare_values` by moving it to its own utility method and allows comparing to null values * Adds `get_previous_record` method to grab previous record from Datastore, which can grab the previous record or the previous record where a specific field is non-empty - * Updates `_validate_temporalrules` to also support the `ignore_empty` parameter * Adds support for comparing against the previous record in `compare_with` * Adds new rule `compare_age` to handle rules that need to compare ages relative to a date * Adds custom operator `count_exact` to `json_logic.py` +* Refactors the tests to be more modularized so that they're more manageable +* Refactors logic for `compare_values` by moving it to its own utility method and allows comparing to null values +* Updates `_check_subschema_valid` to accept an optional record parameter, defaults to the document - used for rules that require the previous record +* Updates `_validate_compute_gds` to remove partial GDS score calculation +* Updates `_validate_compare_with` to support the `abs` operator +* Updates `_validate_compatibility` error message to be more verbose +* Updates `_validate_temporalrules` to also support `ignore_empty` and `swap_order` parameters * Fixes issue where `datastore` was not being set for the temp validator in `_check_subschema_valid`, causing nested conditions with previous records to not evaluate correctly ## 0.3.0 +* Adds `_check_with_rxnorm` function to check whether a given Drug ID is valid RXCUI code * Updates `_validate_compare_with` to allow adjustments to be another field, and for base values to be hardcoded values * Updates json_logic `less` function to handle None * Updates `_validate_temporalrules` to iterate on multiple fields for `previous` and `current` clauses, remove `orderby` attribute * Updates `_check_with_gds` function to `_validate_compute_gds` and update GDS score validation -* Adds `_check_with_rxnorm` function to check whether a given Drug ID is valid RXCUI code * Fixes a bug in min/max validation wrt current_year ## 0.2.0 +* Adds documentation/user guides and testing +* Adds this CHANGELOG * Updates `_validate_compatibility` to iterate on multiple fields for `then` and `else` clauses, similar to how the `if` clause was handling it * Updates `cast_record` to set any missing fields to `None` * Updates `pants_version` in `pants.toml` to `2.22.0` (latest) to support building on macOS * Renamed `validator` to `nacc_form_validator` for a more unique namespace, and imported `QualityCheck` into `__init__.py` for easier access -* Adds documentation/user guides and testing -* Adds this CHANGELOG ## 0.1.1 and older diff --git a/docs/data-quality-rule-definition-guidelines.md b/docs/data-quality-rule-definition-guidelines.md index ac1dfab..b228241 100644 --- a/docs/data-quality-rule-definition-guidelines.md +++ b/docs/data-quality-rule-definition-guidelines.md @@ -854,12 +854,17 @@ The validator also has custom operators in addition to the ones provided by json ### temporalrules Used to specify the list of checks to be performed against the previous visit for the same participant. -* Each constraint specifies `previous` and `current` attributes. If conditions specified under `previous` subschema satisfied by the previous visit record, then the current visit record must satisfy the conditions specified under `current` subschema. -* Each `previous/current` attribute can have several fields which need to be satisifed, with the optional `*_op` attribute specifying the boolean operation in which to compare the different fields. For example, if `prev_op = or`, then as long as _any_ of the fields satsify their schema, the `current` attribute will be evaluated. The default `*_op` is `and`. -* Each constraint also allows for an optional `ignore_empty` option, which can take a string or list of strings denoting fields that cannot be empty. When grabbing the previous record, the validator will grab the first previous record where all specified fields are non-empty. + +Each constraint specifies `previous` and `current` attributes. If conditions specified under the `previous` subschema are satisfied by the previous visit record, then the current visit record must satisfy the conditions specified under `current` subschema. + +Each constraint also has optional fields that can be set: + +* Each `previous/current` attribute can have several fields which need to be satisifed, so an optional `*_op` attribute can be used to specify the boolean operation in which to compare the different fields. For example, if `prev_op = or`, then as long as _any_ of the fields satsify their schema, the `current` attribute will be evaluated. The default `*_op` is `and`. +* `ignore_empty`: Takes a string or list of strings denoting fields that cannot be empty. When grabbing the previous record, the validator will grab the first previous record where _all_ specified fields are non-empty. * If no record is found that satisfies all fields being non-empty, the validator will _ignore_ the check (e.g. pass through validation without errors) +* `swap_order`: If set to True, it will swap the order of operations, evluating the `current` subschema first, then the `previous` subschema -*To validate `temporalrules`, the validator should have a `Datastore` instance which will be used to retrieve the previous visit record(s) for the participant.* +> **NOTE**: To validate `temporalrules`, the validator should have a `Datastore` instance which will be used to retrieve the previous visit record(s) for the participant. The rule definition for `temporalrules` should follow the following format: @@ -867,25 +872,34 @@ The rule definition for `temporalrules` should follow the following format: { "": { "temporalrules": [ - { - "previous": { - "": "subschema to be satisfied for the previous record" - }, - "current": { - "": "subschema to be satisfied for the current record" - } + { + "previous": { + "": "subschema to be satisfied for the previous record" }, - { - "ignore_empty": ["", ""], - "prev_op": "or", - "previous": { - "": "subschema to be satisfied for the previous record", - "": "subschema to be satisfied for the previous record" - }, - "current": { - "": "subschema to be satisfied for the current record" - } + "current": { + "": "subschema to be satisfied for the current record, will be evaluated if the previous subschema is satisfied" + } + }, { + "ignore_empty": ["", ""], + "prev_op": "or", + "previous": { + "": "subschema to be satisfied for the previous record", + "": "subschema to be satisfied for the previous record" + }, + "current": { + "": "subschema to be satisfied for the current record, will be evaluated if either of the previous subschemas are satisfied" } + }, + { + "swap_order": true, + "previous": { + "": "subschema to be satisfied for the previous record, will be evaluated if the current subschema is satisfied" + }, + "current": { + "": "subschema to be satisfied for the current record" + } + } + ] } } diff --git a/nacc_form_validator/errors.py b/nacc_form_validator/errors.py index c9f9d95..6b49304 100644 --- a/nacc_form_validator/errors.py +++ b/nacc_form_validator/errors.py @@ -89,7 +89,8 @@ def __set_custom_error_codes(self): 0x1009: "{1} for if {2} else {3} - compatibility rule no: {0}", 0x2000: - "{1} in current visit for if {2} in previous visit then {3} in current visit - temporal rule no: {0}", + "{1} in current visit for if {2} in previous visit then {3} " + + "in current visit - temporal rule no: {0}", 0x2001: "primary key variable {0} not set in current visit data", 0x2002: @@ -121,7 +122,8 @@ def __set_custom_error_codes(self): 0x3003: "Error in comparing {0} to age at {1} ({2}): {3}", 0x3004: - "{1} in previous visit for if {2} in current visit then {3} in previous visit - temporal rule no: {0}", + "{1} in previous visit for if {2} in current visit then {3} " + + "in previous visit - temporal rule no: {0}", } self.messages.update(custom_errors) diff --git a/nacc_form_validator/nacc_validator.py b/nacc_form_validator/nacc_validator.py index 8cdebca..885ffed 100755 --- a/nacc_form_validator/nacc_validator.py +++ b/nacc_form_validator/nacc_validator.py @@ -736,8 +736,9 @@ def _validate_temporalrules(self, temporalrules: List[Mapping], field: str, if not swap_order: # Check if conditions for the previous visit is satisfied - valid, _ = self._check_subschema_valid( - prev_conds, prev_operator, record=prev_ins) + valid, _ = self._check_subschema_valid(prev_conds, + prev_operator, + record=prev_ins) # If not satisfied, continue to next rule if not valid: @@ -757,8 +758,9 @@ def _validate_temporalrules(self, temporalrules: List[Mapping], field: str, continue # if satisfied, validate previous visit - valid, errors = self._check_subschema_valid( - prev_conds, prev_operator, record=prev_ins) + valid, errors = self._check_subschema_valid(prev_conds, + prev_operator, + record=prev_ins) # Cross visit validation failed - report errors if not valid and errors: