From 3503471e70e1080ce292b1a39655e2cb5009ae50 Mon Sep 17 00:00:00 2001 From: Chandima H Date: Tue, 10 Dec 2024 15:30:01 +0000 Subject: [PATCH 1/6] Add adcid validation --- nacc_form_validator/datastore.py | 14 +++++++ nacc_form_validator/errors.py | 47 +++++------------------ nacc_form_validator/nacc_validator.py | 54 +++++++++++++++++++++------ nacc_form_validator/quality_check.py | 40 ++++++++++++++++++++ 4 files changed, 106 insertions(+), 49 deletions(-) diff --git a/nacc_form_validator/datastore.py b/nacc_form_validator/datastore.py index a5e2587..3bb0a87 100755 --- a/nacc_form_validator/datastore.py +++ b/nacc_form_validator/datastore.py @@ -84,3 +84,17 @@ def is_valid_rxcui(self, drugid: int) -> bool: bool: True if provided drug ID is valid, else False """ return False + + @abstractmethod + def is_valid_adcid(self, adcid: int, own: bool) -> bool: + """Abstract method to check whether a given ADCID is valid. Override + this method to implement ADCID validation. + + Args: + adcid: provided ADCID + own: whether to check own ADCID or another center's ADCID + + Returns: + bool: True if provided ADCID is valid, else False + """ + return False diff --git a/nacc_form_validator/errors.py b/nacc_form_validator/errors.py index 8589840..5b194b0 100644 --- a/nacc_form_validator/errors.py +++ b/nacc_form_validator/errors.py @@ -9,8 +9,11 @@ ValidationError, ) +from nacc_form_validator.quality_check import SchemaDefs # pylint: disable=(too-few-public-methods) + + class ErrorDefs: """Class to define custom errors.""" @@ -43,6 +46,8 @@ class ErrorDefs: COMPARE_AGE = ErrorDefinition(0x3002, 'compare_age') COMPARE_AGE_INVALID_COMPARISON = ErrorDefinition(0x3003, 'compare_age') TEMPORAL_SWAPPED = ErrorDefinition(0x3004, 'temporalrules') + ADCID_NOT_MATCH = ErrorDefinition(0x3005, "function") + ADCID_NOT_VALID = ErrorDefinition(0x3006, "function") class CustomErrorHandler(BasicErrorHandler): @@ -124,6 +129,10 @@ def __set_custom_error_codes(self): 0x3004: "{1} for if {3} in current visit then {2} " + "in previous visit - temporal rule no: {0}", + 0x3005: + "Provided ADCID {0} is not matching to your center's ADCID", + 0x3006: + "Provided ADCID {0} is not in the valid list of ADCIDs", } self.messages.update(custom_errors) @@ -145,41 +154,3 @@ def _format_message(self, field: str, error: ValidationError): return field + ": " + error_msg return super()._format_message(field, error) - - -class SchemaDefs: - """Class to store schema attribute labels.""" - - TYPE = "type" - OP = "op" - IF_OP = "if_op" - THEN_OP = "then_op" - ELSE_OP = "else_op" - IF = "if" - THEN = "then" - ELSE = "else" - META = "meta" - ERRMSG = "errmsg" - ORDERBY = "orderby" - CONSTRAINTS = "constraints" - PREV_OP = "prev_op" - CURR_OP = "curr_op" - CURRENT = "current" - PREVIOUS = "previous" - CRR_DATE = "current_date" - CRR_YEAR = "current_year" - CRR_MONTH = "current_month" - CRR_DAY = "current_day" - PREV_RECORD = "previous_record" - FORMULA = "formula" - INDEX = "index" - FORMATTING = "formatting" - COMPARATOR = "comparator" - BASE = "base" - ADJUST = "adjustment" - IGNORE_EMPTY = "ignore_empty" - BIRTH_MONTH = 'birth_month' - BIRTH_DAY = 'birth_day' - BIRTH_YEAR = 'birth_year' - COMPARE_TO = "compare_to" - SWAP_ORDER = "swap_order" diff --git a/nacc_form_validator/nacc_validator.py b/nacc_form_validator/nacc_validator.py index cf23ea2..396cbab 100755 --- a/nacc_form_validator/nacc_validator.py +++ b/nacc_form_validator/nacc_validator.py @@ -10,12 +10,9 @@ from nacc_form_validator import utils from nacc_form_validator.datastore import Datastore -from nacc_form_validator.errors import ( - CustomErrorHandler, - ErrorDefs, - SchemaDefs, -) +from nacc_form_validator.errors import CustomErrorHandler, ErrorDefs from nacc_form_validator.json_logic import jsonLogic +from nacc_form_validator.quality_check import SchemaDefs log = logging.getLogger(__name__) @@ -800,11 +797,12 @@ def _validate_logic(self, logic: Dict[str, Any], field: str, except ValueError as error: self._error(field, ErrorDefs.FORMULA, str(error)) - def _validate_function(self, function: str, field: str, value: object): + def _validate_function(self, function: Dict[str, Any], field: str, + value: object): """Validate using a custom defined function. Args: - function: Function name + function: Dict specifying function name and arguments field: Variable name value: Variable value @@ -812,14 +810,23 @@ def _validate_function(self, function: str, field: str, value: object): Cerberus uses it to validate the schema definition. The rule's arguments are validated against this schema: - {'type': 'string', 'empty': False} + { + 'type': 'dict', + 'schema': { + 'name': {'type': 'string', 'required': True, 'empty': False}, + 'args': {'type': 'dict', 'required': False} + } + } """ - func = getattr(self, function, None) + function_name = '_' + \ + function.get(SchemaDefs.FUNCTION_NAME, 'undefined') + func = getattr(self, function_name, None) if func and callable(func): - func(value) + kwargs = getattr(self, function.get(SchemaDefs.FUNCTION_ARGS), {}) + func(field, value, **kwargs) else: - err_msg = f"{function} not defined in the validator module" + err_msg = f"{function_name} not defined in the validator module" self.__add_system_error(field, err_msg) raise ValidationException(err_msg) @@ -997,6 +1004,9 @@ def _check_with_rxnorm(self, field: str, value: Optional[int]): Args: field: Variable name value: Variable value + + Raises: + ValidationException: If Datastore not set """ # No need to validate if blank or 0 (No RXCUI code available) @@ -1106,3 +1116,25 @@ def _validate_compare_age(self, comparison: Dict[str, Any], field: str, except TypeError as error: self._error(field, ErrorDefs.COMPARE_AGE_INVALID_COMPARISON, compare_field, field, age, str(error)) + + def _check_adcid(self, field: str, value: int, own: bool = True): + """Check whether a given ADCID is valid. + + Args: + field: name of ADCID field + value: ADCID value + own (optional): whether to check own ADCID or another center's ADCID. + + Raises: + ValidationException: If Datastore not set + """ + + if not self.datastore: + err_msg = "Datastore not set, cannot validate ADCID" + self.__add_system_error(field, err_msg) + raise ValidationException(err_msg) + + if not self.datastore.is_valid_adcid(value, own): + self._error( + field, ErrorDefs.ADCID_NOT_MATCH + if own else ErrorDefs.ADCID_NOT_VALID, value) diff --git a/nacc_form_validator/quality_check.py b/nacc_form_validator/quality_check.py index ffa3274..d5bd8e6 100755 --- a/nacc_form_validator/quality_check.py +++ b/nacc_form_validator/quality_check.py @@ -13,6 +13,46 @@ ) +class SchemaDefs: + """Class to store schema attribute labels.""" + + TYPE = "type" + OP = "op" + IF_OP = "if_op" + THEN_OP = "then_op" + ELSE_OP = "else_op" + IF = "if" + THEN = "then" + ELSE = "else" + META = "meta" + ERRMSG = "errmsg" + ORDERBY = "orderby" + CONSTRAINTS = "constraints" + PREV_OP = "prev_op" + CURR_OP = "curr_op" + CURRENT = "current" + PREVIOUS = "previous" + CRR_DATE = "current_date" + CRR_YEAR = "current_year" + CRR_MONTH = "current_month" + CRR_DAY = "current_day" + PREV_RECORD = "previous_record" + FORMULA = "formula" + INDEX = "index" + FORMATTING = "formatting" + COMPARATOR = "comparator" + BASE = "base" + ADJUST = "adjustment" + IGNORE_EMPTY = "ignore_empty" + BIRTH_MONTH = 'birth_month' + BIRTH_DAY = 'birth_day' + BIRTH_YEAR = 'birth_year' + COMPARE_TO = "compare_to" + SWAP_ORDER = "swap_order" + FUNCTION_NAME = 'name' + FUNCTION_ARGS = 'args' + + class QualityCheckException(Exception): """Raised if something goes wrong while loading rule definitions.""" From 26aa463dab3f4cd8a9cfc55539a8ffa59b321dca Mon Sep 17 00:00:00 2001 From: Chandima H Date: Tue, 10 Dec 2024 16:04:08 +0000 Subject: [PATCH 2/6] Move schema keys to a separate class --- nacc_form_validator/errors.py | 2 +- nacc_form_validator/keys.py | 41 +++++++++++++++++++++++ nacc_form_validator/nacc_validator.py | 4 +-- nacc_form_validator/quality_check.py | 47 ++------------------------- 4 files changed, 46 insertions(+), 48 deletions(-) create mode 100644 nacc_form_validator/keys.py diff --git a/nacc_form_validator/errors.py b/nacc_form_validator/errors.py index 5b194b0..cd40287 100644 --- a/nacc_form_validator/errors.py +++ b/nacc_form_validator/errors.py @@ -9,7 +9,7 @@ ValidationError, ) -from nacc_form_validator.quality_check import SchemaDefs +from nacc_form_validator.keys import SchemaDefs # pylint: disable=(too-few-public-methods) diff --git a/nacc_form_validator/keys.py b/nacc_form_validator/keys.py new file mode 100644 index 0000000..899544b --- /dev/null +++ b/nacc_form_validator/keys.py @@ -0,0 +1,41 @@ +"""Module for commonly used keys.""" + + +class SchemaDefs: + """Class to store JSON schema attribute labels.""" + + TYPE = "type" + OP = "op" + IF_OP = "if_op" + THEN_OP = "then_op" + ELSE_OP = "else_op" + IF = "if" + THEN = "then" + ELSE = "else" + META = "meta" + ERRMSG = "errmsg" + ORDERBY = "orderby" + CONSTRAINTS = "constraints" + PREV_OP = "prev_op" + CURR_OP = "curr_op" + CURRENT = "current" + PREVIOUS = "previous" + CRR_DATE = "current_date" + CRR_YEAR = "current_year" + CRR_MONTH = "current_month" + CRR_DAY = "current_day" + PREV_RECORD = "previous_record" + FORMULA = "formula" + INDEX = "index" + FORMATTING = "formatting" + COMPARATOR = "comparator" + BASE = "base" + ADJUST = "adjustment" + IGNORE_EMPTY = "ignore_empty" + BIRTH_MONTH = 'birth_month' + BIRTH_DAY = 'birth_day' + BIRTH_YEAR = 'birth_year' + COMPARE_TO = "compare_to" + SWAP_ORDER = "swap_order" + FUNCTION_NAME = 'name' + FUNCTION_ARGS = 'args' diff --git a/nacc_form_validator/nacc_validator.py b/nacc_form_validator/nacc_validator.py index 396cbab..4a45948 100755 --- a/nacc_form_validator/nacc_validator.py +++ b/nacc_form_validator/nacc_validator.py @@ -12,7 +12,7 @@ from nacc_form_validator.datastore import Datastore from nacc_form_validator.errors import CustomErrorHandler, ErrorDefs from nacc_form_validator.json_logic import jsonLogic -from nacc_form_validator.quality_check import SchemaDefs +from nacc_form_validator.keys import SchemaDefs log = logging.getLogger(__name__) @@ -823,7 +823,7 @@ def _validate_function(self, function: Dict[str, Any], field: str, function.get(SchemaDefs.FUNCTION_NAME, 'undefined') func = getattr(self, function_name, None) if func and callable(func): - kwargs = getattr(self, function.get(SchemaDefs.FUNCTION_ARGS), {}) + kwargs = function.get(SchemaDefs.FUNCTION_ARGS, {}) func(field, value, **kwargs) else: err_msg = f"{function_name} not defined in the validator module" diff --git a/nacc_form_validator/quality_check.py b/nacc_form_validator/quality_check.py index d5bd8e6..2357912 100755 --- a/nacc_form_validator/quality_check.py +++ b/nacc_form_validator/quality_check.py @@ -6,51 +6,8 @@ from cerberus.schema import SchemaError from nacc_form_validator.datastore import Datastore -from nacc_form_validator.nacc_validator import ( - CustomErrorHandler, - NACCValidator, - ValidationException, -) - - -class SchemaDefs: - """Class to store schema attribute labels.""" - - TYPE = "type" - OP = "op" - IF_OP = "if_op" - THEN_OP = "then_op" - ELSE_OP = "else_op" - IF = "if" - THEN = "then" - ELSE = "else" - META = "meta" - ERRMSG = "errmsg" - ORDERBY = "orderby" - CONSTRAINTS = "constraints" - PREV_OP = "prev_op" - CURR_OP = "curr_op" - CURRENT = "current" - PREVIOUS = "previous" - CRR_DATE = "current_date" - CRR_YEAR = "current_year" - CRR_MONTH = "current_month" - CRR_DAY = "current_day" - PREV_RECORD = "previous_record" - FORMULA = "formula" - INDEX = "index" - FORMATTING = "formatting" - COMPARATOR = "comparator" - BASE = "base" - ADJUST = "adjustment" - IGNORE_EMPTY = "ignore_empty" - BIRTH_MONTH = 'birth_month' - BIRTH_DAY = 'birth_day' - BIRTH_YEAR = 'birth_year' - COMPARE_TO = "compare_to" - SWAP_ORDER = "swap_order" - FUNCTION_NAME = 'name' - FUNCTION_ARGS = 'args' +from nacc_form_validator.errors import CustomErrorHandler +from nacc_form_validator.nacc_validator import NACCValidator, ValidationException class QualityCheckException(Exception): From 694ab25504c648541acced85098470de43b220e6 Mon Sep 17 00:00:00 2001 From: Chandima H Date: Tue, 10 Dec 2024 16:08:35 +0000 Subject: [PATCH 3/6] Add ADCID tests --- docs/validate_csv_records.py | 12 ++- nacc_form_validator/__init__.py | 2 +- tests/conftest.py | 6 +- tests/test_nacc_validator_datastore.py | 124 ++++++++++++++++++++----- 4 files changed, 114 insertions(+), 30 deletions(-) diff --git a/docs/validate_csv_records.py b/docs/validate_csv_records.py index 82fa366..d0b4f10 100644 --- a/docs/validate_csv_records.py +++ b/docs/validate_csv_records.py @@ -10,7 +10,8 @@ import logging from pathlib import Path -from nacc_form_validator import QualityCheck + +from nacc_form_validator.quality_check import QualityCheck logging.basicConfig(level=logging.DEBUG) log = logging.getLogger(__name__) @@ -38,9 +39,11 @@ log.info(f"strict mode::\t{not args.disable_strict}") if not args.rules_json.is_file(): - raise FileNotFoundError(f"Cannot find specified rules JSON: {args.rules_json}") + raise FileNotFoundError( + f"Cannot find specified rules JSON: {args.rules_json}") if not args.input_records_csv.is_file(): - raise FileNotFoundError(f"Cannot find specified input records CSV: {args.input_records_csv}") + raise FileNotFoundError( + f"Cannot find specified input records CSV: {args.input_records_csv}") """ Instantiate the quality check object from rules JSON. This script assumes no datastore, and therefor @@ -70,7 +73,8 @@ errors['row'] = i all_errors.append(errors) error_headers.update(set(errors.keys())) - log.warning(f"Row {i} in the input records CSV failed validation") + log.warning( + f"Row {i} in the input records CSV failed validation") """ Convert all_errors and error_headers to a "csv-like" dict for writing/printing out diff --git a/nacc_form_validator/__init__.py b/nacc_form_validator/__init__.py index a690755..8b13789 100644 --- a/nacc_form_validator/__init__.py +++ b/nacc_form_validator/__init__.py @@ -1 +1 @@ -from .quality_check import QualityCheck # noqa: F401 + diff --git a/tests/conftest.py b/tests/conftest.py index d145715..d23930d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,8 +3,8 @@ """ import pytest -from dateutil import parser -from nacc_form_validator.nacc_validator import NACCValidator, CustomErrorHandler +from nacc_form_validator.nacc_validator import NACCValidator +from nacc_form_validator. errors import CustomErrorHandler @pytest.fixture(scope="session") @@ -16,6 +16,7 @@ def _create_nacc_validator(schema: dict[str, object]) -> NACCValidator: error_handler=CustomErrorHandler(schema)) return _create_nacc_validator + @pytest.fixture def nv(create_nacc_validator): """ Returns a dummy QC with all data kinds of data types/rules to use for general testing """ @@ -49,6 +50,7 @@ def nv(create_nacc_validator): return create_nacc_validator(schema) + @pytest.fixture(scope="session") def date_constraint(): """ MM/DD/YYYY or YYYY/MM/DD """ diff --git a/tests/test_nacc_validator_datastore.py b/tests/test_nacc_validator_datastore.py index a0a4c99..b7931bd 100644 --- a/tests/test_nacc_validator_datastore.py +++ b/tests/test_nacc_validator_datastore.py @@ -5,8 +5,10 @@ import copy import pytest -from nacc_form_validator.nacc_validator import NACCValidator, CustomErrorHandler + from nacc_form_validator.datastore import Datastore +from nacc_form_validator.errors import CustomErrorHandler +from nacc_form_validator.nacc_validator import NACCValidator class CustomDatastore(Datastore): @@ -37,6 +39,10 @@ def __init__(self, pk_field: str, orderby: str) -> None: # dummy rxcui mapping for testing self.__valid_rxcui = list(range(50)) + # dummy ADCIDs for testing + self.__adcid = 0 + self.__valid_adcids = [0, 2, 5, 8, 10] + super().__init__(pk_field, orderby) def get_previous_record(self, current_record: dict[str, str]) -> dict[str, str] | None: @@ -86,6 +92,13 @@ def is_valid_rxcui(self, drugid: int) -> bool: """ For RXCUI testing """ return drugid in self.__valid_rxcui + def is_valid_adcid(self, adcid: int, own: bool) -> bool: + """ For ADCID testing """ + if own: + return adcid == self.__adcid + + return adcid in self.__valid_adcids + def create_nacc_validator_with_ds(schema: dict[str, object], pk_field: str, orderby: str) -> NACCValidator: """ Creates a generic NACCValidtor with the above CustomDataStore """ @@ -97,6 +110,7 @@ def create_nacc_validator_with_ds(schema: dict[str, object], pk_field: str, orde nv.datastore = CustomDatastore(pk_field, orderby) return nv + @pytest.fixture def schema(): return { @@ -118,29 +132,38 @@ def schema(): } } + def test_temporal_check(schema): """ Temporal test check - this is basically a more involved version of the example provided in docs/index.md """ nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1','visit_num': 4, 'taxes': 1}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'taxes': 1}) - assert not nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'taxes': 8}) + assert not nv.validate( + {'patient_id': 'PatientID1', 'visit_num': 4, 'taxes': 8}) assert nv.errors == {'taxes': [ "('taxes', ['unallowed value 8']) for if {'taxes': {'allowed': [0]}} in previous visit then {'taxes': {'forbidden': [8]}} in current visit - temporal rule no: 0"]} + def test_temporal_check_swap_order(schema): """ Test temporal check when the order of evaluation is swapped """ schema['taxes']['temporalrules'][0]['swap_order'] = True nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1','visit_num': 4, 'taxes': 1}) - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'taxes': 8}) # since 8 fails the current condition, validation skipped + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'taxes': 1}) + # since 8 fails the current condition, validation skipped + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'taxes': 8}) nv.reset_record_cache() - assert not nv.validate({'patient_id': 'PatientID1', 'visit_num': 2, 'taxes': 1}) + assert not nv.validate( + {'patient_id': 'PatientID1', 'visit_num': 2, 'taxes': 1}) assert nv.errors == {'taxes': [ "('taxes', ['unallowed value 8']) for if {'taxes': {'forbidden': [8]}} in current visit then {'taxes': {'allowed': [0]}} in previous visit - temporal rule no: 0"]} + def test_temporal_check_no_prev_visit(schema): """ Temporal test check when there are no previous visits (e.g. before visit 0) """ nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') @@ -150,6 +173,7 @@ def test_temporal_check_no_prev_visit(schema): assert nv.errors == {'taxes': [ 'failed to retrieve the previous visit, cannot proceed with validation']} + def test_temporal_check_previous_nonempty(): """ Temporal check where previous record is nonempty """ schema = { @@ -173,11 +197,14 @@ def test_temporal_check_previous_nonempty(): } } nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthmo': 6}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'birthmo': 6}) # if ignore_empty is set and we cannot find a record, pass through the validation nv.reset_record_cache() - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 2, 'birthmo': 6}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 2, 'birthmo': 6}) + def test_compare_with_previous_record(): """ Test compare_with previous record """ @@ -195,13 +222,18 @@ def test_compare_with_previous_record(): } nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthyr': 1950}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'birthyr': 1950}) - assert not nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthyr': 2000}) - assert nv.errors == {'birthyr': ["input value doesn't satisfy the condition birthyr == birthyr (previous record)"]} + assert not nv.validate( + {'patient_id': 'PatientID1', 'visit_num': 4, 'birthyr': 2000}) + assert nv.errors == {'birthyr': [ + "input value doesn't satisfy the condition birthyr == birthyr (previous record)"]} nv.reset_record_cache() - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 2, 'birthyr': 1950}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 2, 'birthyr': 1950}) + def test_compare_with_previous_nonempty_record(): """ Test compare_with previous nonempty record """ @@ -220,11 +252,14 @@ def test_compare_with_previous_nonempty_record(): } nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthmo': 6}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'birthmo': 6}) # since ignore_empty = True, this will skip over validation in the previous record not found nv.reset_record_cache() - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 2, 'birthmo': 6}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 2, 'birthmo': 6}) + def test_compare_with_previous_nonempty_record_not_allowed(): """ Test compare_with previous nonempty record but not ignoring empty """ @@ -242,12 +277,16 @@ def test_compare_with_previous_nonempty_record_not_allowed(): } nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthmo': 6}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'birthmo': 6}) # since ignore_empty = False, this is not allowed nv.reset_record_cache() - assert not nv.validate({'patient_id': 'PatientID1', 'visit_num': 2, 'birthmo': 6}) - assert nv.errors == {'birthmo': ['failed to retrieve record for previous visit, cannot proceed with validation birthmo == birthmo (previous record)']} + assert not nv.validate( + {'patient_id': 'PatientID1', 'visit_num': 2, 'birthmo': 6}) + assert nv.errors == {'birthmo': [ + 'failed to retrieve record for previous visit, cannot proceed with validation birthmo == birthmo (previous record)']} + def test_compare_with_previous_different_variable(): """ Test compare_with previous record and a different variable name. @@ -268,13 +307,18 @@ def test_compare_with_previous_different_variable(): } nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthyear': 1950}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'birthyear': 1950}) - assert not nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthyear': 2000}) - assert nv.errors == {'birthyear': ["input value doesn't satisfy the condition birthyear == birthyr (previous record)"]} + assert not nv.validate( + {'patient_id': 'PatientID1', 'visit_num': 4, 'birthyear': 2000}) + assert nv.errors == {'birthyear': [ + "input value doesn't satisfy the condition birthyear == birthyr (previous record)"]} nv.reset_record_cache() - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 2, 'birthyear': 1950}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 2, 'birthyear': 1950}) + def test_temporal_check_with_nested_compare_with_previous_record(): """ Test when compare_with previous_record nested inside a temporalrules """ @@ -301,15 +345,18 @@ def test_temporal_check_with_nested_compare_with_previous_record(): } } nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') - assert nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthyr': 1950}) + assert nv.validate({'patient_id': 'PatientID1', + 'visit_num': 4, 'birthyr': 1950}) - assert not nv.validate({'patient_id': 'PatientID1', 'visit_num': 4, 'birthyr': 1951}) + assert not nv.validate( + {'patient_id': 'PatientID1', 'visit_num': 4, 'birthyr': 1951}) assert nv.errors == {'birthyr': [ '(\'birthyr\', ["input value doesn\'t satisfy the condition birthyr == birthyr (previous record)"]) for ' + 'if {\'birthyr\': {\'forbidden\': [-1]}} in previous visit ' + 'then {\'birthyr\': {\'compare_with\': {\'comparator\': \'==\', \'base\': \'birthyr\', \'previous_record\': True}}} in current visit ' + '- temporal rule no: 0']} + def test_check_with_rxnorm(): """ Test checking drugID is a valid RXCUI """ schema = { @@ -328,3 +375,34 @@ def test_check_with_rxnorm(): assert nv.errors == {'drug': ['Drug ID -1 is not a valid RXCUI']} assert not nv.validate({"drug": 100}) assert nv.errors == {'drug': ['Drug ID 100 is not a valid RXCUI']} + + +def test_check_adcid(): + """ Test checking provided ADCID is valid """ + + schema = { + "adcid": { + "type": "integer", + "function": { + "name": "check_adcid" + } + }, + "oldadcid": { + "type": "integer", + "function": { + "name": "check_adcid", + "args": {"own": False} + } + } + } + + nv = create_nacc_validator_with_ds(schema, 'patient_id', 'visit_num') + + assert nv.validate({"adcid": 0}) + assert nv.validate({"oldadcid": 10}) + assert not nv.validate({"adcid": 1}) + assert nv.errors == { + 'adcid': ["Provided ADCID 1 is not matching to your center's ADCID"]} + assert not nv.validate({"oldadcid": 20}) + assert nv.errors == { + 'oldadcid': ["Provided ADCID 20 is not in the valid list of ADCIDs"]} From c55164b0cb1702e08aece757757920ee717c1e81 Mon Sep 17 00:00:00 2001 From: Chandima H Date: Tue, 10 Dec 2024 16:25:10 +0000 Subject: [PATCH 4/6] Update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7b54e5..0f1a7aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Documentation of release versions of `nacc-form-validator` ## 0.4.0 +* Adds `_check_adcid` method to validate a provided ADCID against current list of ADCIDs. (Actual validation should be implemented by overriding the `is_valid_adcid` method in Datastore class) * Adds `get_previous_record` method to grab previous record from Datastore, which can grab the previous record or the previous record where a specific field is non-empty * Adds support for comparing against the previous record in `compare_with` * Adds new rule `compare_age` to handle rules that need to compare ages relative to a date @@ -19,7 +20,7 @@ Documentation of release versions of `nacc-form-validator` ## 0.3.0 -* Adds `_check_with_rxnorm` function to check whether a given Drug ID is valid RXCUI code +* Adds `_check_with_rxnorm` function to check whether a given Drug ID is valid RXCUI code. (Actual validation should be implemented by overriding the `is_valid_rxcui` method in Datastore class) * Updates `_validate_compare_with` to allow adjustments to be another field, and for base values to be hardcoded values * Updates json_logic `less` function to handle None * Updates `_validate_temporalrules` to iterate on multiple fields for `previous` and `current` clauses, remove `orderby` attribute From 0bc288210836ae21e06438f6926455d0899894f4 Mon Sep 17 00:00:00 2001 From: Emily Cheng Date: Tue, 10 Dec 2024 09:32:15 -0800 Subject: [PATCH 5/6] Add docs --- ...data-quality-rule-definition-guidelines.md | 74 ++++++++++++++++++- nacc_form_validator/errors.py | 2 +- tests/test_nacc_validator_datastore.py | 2 +- tests/test_rules_cerberus.py | 24 ++++++ 4 files changed, 99 insertions(+), 3 deletions(-) diff --git a/docs/data-quality-rule-definition-guidelines.md b/docs/data-quality-rule-definition-guidelines.md index 28c61bd..3fd6421 100644 --- a/docs/data-quality-rule-definition-guidelines.md +++ b/docs/data-quality-rule-definition-guidelines.md @@ -12,6 +12,7 @@ - [compatibility](#compatibility) - [logic](#logic) - [temporalrules](#temporalrules) + - [check\_adcid](#check_adcid) - [compute\_gds](#compute_gds) - [rxnorm](#rxnorm) @@ -916,6 +917,77 @@ If field `taxes` (difficulty with taxes, business, and other papers) is 0 (norma +### check_adcid + +Used to validate an ADCID is valid. + +This function falls under the `function` rule which in turn calls the custom `check_adcid` rule in the NACCValidator. The rule definition should be in the following format: + +```json +{ + "": { + "function": { + "name": "check_adcid", + "args": {"own": ""} + } + } +} +``` + +> **NOTE**: To validate `check_adcid`, the validator should have a `Datastore` instance which implements the `is_valid_adcid` function which will check if the given ADCID value is valid + +**Example:** + +The `adcid` must match the center's own ADCID, whereas `oldadcid` should be a valid ADCID but _not_ match its own. Which ADCIDs are valid is defined by the `Datastore` instance. + + + + + + + + + + + + +
YAML Rule DefinitionJSON Rule DefinitionWhen Validating
+
adcid:
+  type: integer
+  function:
+    name: check_adcid
+oldadcid:
+  type: integer
+  function:
+    name: check_adcid
+    args:
+      - own: false
+
+
+
{
+    "adcid": {
+        "type": "integer",
+        "function": {
+            "name": "check_adcid"
+        }
+    },
+    "oldadcid": {
+        "type": "integer",
+        "function": {
+            "name": "check_adcid",
+            "args": {"own": False}
+    }
+}
+
+
+
# assume the center's own ADCID is 0, and ADCIDs 0-5 inclusive are valid
+
+{"adcid": 0, "oldadcid": 5}   # passes
+{"adcid": 2, "oldadcid": 5}   # fails
+{"adcid": 0, "oldadcid": 9}   # fails
+
+
+ ### compute_gds Custom rule defined to validate the Geriatric Depression Scale (GDS) score computation. Only be used for validating the `gds` field in UDS Form B6. @@ -934,7 +1006,7 @@ The rule definition for `compute_gds` should follow the following format: Custom rule defined to check whether a given Drug ID is valid RXCUI code. -This function uses the `check_with` rule from Cerberus. Rule definition should be in the following format: +This function uses the `check_with` rule from Cerberus. The rule definition should be in the following format: ```json { diff --git a/nacc_form_validator/errors.py b/nacc_form_validator/errors.py index cd40287..e9ae08f 100644 --- a/nacc_form_validator/errors.py +++ b/nacc_form_validator/errors.py @@ -130,7 +130,7 @@ def __set_custom_error_codes(self): "{1} for if {3} in current visit then {2} " + "in previous visit - temporal rule no: {0}", 0x3005: - "Provided ADCID {0} is not matching to your center's ADCID", + "Provided ADCID {0} does not match your center's ADCID", 0x3006: "Provided ADCID {0} is not in the valid list of ADCIDs", } diff --git a/tests/test_nacc_validator_datastore.py b/tests/test_nacc_validator_datastore.py index b7931bd..eb8a00f 100644 --- a/tests/test_nacc_validator_datastore.py +++ b/tests/test_nacc_validator_datastore.py @@ -402,7 +402,7 @@ def test_check_adcid(): assert nv.validate({"oldadcid": 10}) assert not nv.validate({"adcid": 1}) assert nv.errors == { - 'adcid': ["Provided ADCID 1 is not matching to your center's ADCID"]} + 'adcid': ["Provided ADCID 1 does not match your center's ADCID"]} assert not nv.validate({"oldadcid": 20}) assert nv.errors == { 'oldadcid': ["Provided ADCID 20 is not in the valid list of ADCIDs"]} diff --git a/tests/test_rules_cerberus.py b/tests/test_rules_cerberus.py index 957bb3e..28de3c9 100644 --- a/tests/test_rules_cerberus.py +++ b/tests/test_rules_cerberus.py @@ -136,3 +136,27 @@ def test_date_format(date_constraint, create_nacc_validator): assert nv.errors == {'frmdate': [f"value does not match regex '{date_constraint}'"]} assert not nv.validate({'frmdate': 'hello world'}) assert nv.errors == {'frmdate': [f"value does not match regex '{date_constraint}'"]} + + +def test_allowed(create_nacc_validator): + """ Test allowed with strings. """ + schema = { + "testvar": { + "allowed": [ + 1, + 'hello' + ] + } + } + + nv = create_nacc_validator(schema) + + assert nv.validate({'testvar': 1}) + assert nv.validate({'testvar': 'hello'}) + + assert not nv.validate({'testvar': 2}) + assert nv.errors == {'testvar': ['unallowed value 2']} + assert not nv.validate({'testvar': '1'}) + assert nv.errors == {'testvar': ['unallowed value 1']} + assert not nv.validate({'testvar': 'hell0'}) + assert nv.errors == {'testvar': ['unallowed value hell0']} From cf9aa55ee943caf5280bc30c7cc30e90e5926916 Mon Sep 17 00:00:00 2001 From: Chandima H Date: Tue, 10 Dec 2024 13:24:58 -0500 Subject: [PATCH 6/6] update data-quality-rule-definition-guidelines.md --- docs/data-quality-rule-definition-guidelines.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/data-quality-rule-definition-guidelines.md b/docs/data-quality-rule-definition-guidelines.md index 3fd6421..45ad02e 100644 --- a/docs/data-quality-rule-definition-guidelines.md +++ b/docs/data-quality-rule-definition-guidelines.md @@ -919,26 +919,26 @@ If field `taxes` (difficulty with taxes, business, and other papers) is 0 (norma ### check_adcid -Used to validate an ADCID is valid. +Used to check whether a specified ADCID is valid. -This function falls under the `function` rule which in turn calls the custom `check_adcid` rule in the NACCValidator. The rule definition should be in the following format: +This validation is implemented using the `function` rule with custom `check_adcid` function in the NACCValidator. The rule definition should be in the following format: ```json { "": { "function": { "name": "check_adcid", - "args": {"own": ""} + "args": {"own": ""} } } } ``` -> **NOTE**: To validate `check_adcid`, the validator should have a `Datastore` instance which implements the `is_valid_adcid` function which will check if the given ADCID value is valid +> **NOTE**: To validate `check_adcid`, the validator should have a `Datastore` instance which implements the `is_valid_adcid` function (which should have access to center's ADCID and the list of current ADCIDs). **Example:** -The `adcid` must match the center's own ADCID, whereas `oldadcid` should be a valid ADCID but _not_ match its own. Which ADCIDs are valid is defined by the `Datastore` instance. +The `adcid` must match the center's own ADCID, whereas `oldadcid` should be a valid ADCID in the current ADCIDs list. @@ -957,7 +957,7 @@ oldadcid: function: name: check_adcid args: - - own: false + own: false