From 5216c9f6e29222e5159cc30e9b74ebe30cd7a672 Mon Sep 17 00:00:00 2001 From: "Ed (ODSC)" Date: Wed, 2 Oct 2024 14:35:58 +0100 Subject: [PATCH] libcovebods: Fixup statistics for BODS 0.4 --- libcovebods/run_tasks.py | 2 ++ libcovebods/tasks/checks.py | 3 +- libcovebods/tasks/statistics.py | 61 ++++++++++++++++++++++++++++----- tests/test_stat_counts_0_4.py | 39 +++++++++++++++++++++ 4 files changed, 96 insertions(+), 9 deletions(-) create mode 100644 tests/test_stat_counts_0_4.py diff --git a/libcovebods/run_tasks.py b/libcovebods/run_tasks.py index 6bd774e..82b8334 100644 --- a/libcovebods/run_tasks.py +++ b/libcovebods/run_tasks.py @@ -42,6 +42,7 @@ libcovebods.tasks.statistics.StatisticOwnershipOrControlInterestDirectOrIndirect, libcovebods.tasks.statistics.StatisticOwnershipOrControlWithAtLeastOneInterestBeneficial, libcovebods.tasks.statistics.StatisticDeclarationSubjects, + libcovebods.tasks.statistics.StatisticsStatementsRecordStatus, libcovebods.tasks.peps.PEPForSchema02Only, libcovebods.tasks.peps.PEPForSchema03AndAbove, ] @@ -75,6 +76,7 @@ libcovebods.tasks.statistics.StatisticOwnershipOrControlInterestDirectOrIndirect, libcovebods.tasks.statistics.StatisticOwnershipOrControlWithAtLeastOneInterestBeneficial, libcovebods.tasks.statistics.StatisticDeclarationSubjects, + libcovebods.tasks.statistics.StatisticsStatementsRecordStatus, libcovebods.tasks.peps.PEPForSchema02Only, libcovebods.tasks.peps.PEPForSchema03AndAbove, ] diff --git a/libcovebods/tasks/checks.py b/libcovebods/tasks/checks.py index 9ed332d..2d35942 100644 --- a/libcovebods/tasks/checks.py +++ b/libcovebods/tasks/checks.py @@ -1321,7 +1321,8 @@ def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool: return schema_object.is_schema_version_equal_to_or_greater_than("0.4") def check_statement_first_pass(self, statement): - if "recordId" in statement and "statementDate" in statement: + if ("recordId" in statement and "statementDate" in statement and + "statementDate" in statement and statement["statementDate"]): record_status = statement.get("recordStatus") record_type = statement.get("recordType") if statement["recordId"] in self._series: diff --git a/libcovebods/tasks/statistics.py b/libcovebods/tasks/statistics.py index d2c8a35..657666d 100644 --- a/libcovebods/tasks/statistics.py +++ b/libcovebods/tasks/statistics.py @@ -91,19 +91,21 @@ def check_entity_statement_first_pass(self, statement): "recordDetails" in statement and isinstance(statement["recordDetails"], dict) and "entityType" in statement["recordDetails"] - and isinstance(statement["recordDetails"]["entityType"], str) - and statement["recordDetails"]["entityType"] + and isinstance(statement["recordDetails"]["entityType"], dict) + and "type" in statement["recordDetails"]["entityType"] + and isinstance(statement["recordDetails"]["entityType"]["type"], str) + and statement["recordDetails"]["entityType"]["type"] in self.count_entity_statements_types ): self.count_entity_statements_types[ - statement["recordDetails"]["entityType"] + statement["recordDetails"]["entityType"]["type"] ] += 1 - if "identifiers" in statement and isinstance( - statement["identifiers"], list + if "identifiers" in statement["recordDetails"] and isinstance( + statement["recordDetails"]["identifiers"], list ): has_ids = False has_ids_with_id_and_scheme = False - for identifier in statement["identifiers"]: + for identifier in statement["recordDetails"]["identifiers"]: if isinstance(identifier, dict): has_ids = True if ( @@ -118,11 +120,11 @@ def check_entity_statement_first_pass(self, statement): if has_ids: self.count_entity_statements_types_with_any_identifier[ - statement["recordDetails"]["entityType"] + statement["recordDetails"]["entityType"]["type"] ] += 1 if has_ids_with_id_and_scheme: self.count_entity_statements_types_with_any_identifier_with_id_and_scheme[ - statement["recordDetails"]["entityType"] + statement["recordDetails"]["entityType"]["type"] ] += 1 def get_statistics(self): @@ -386,6 +388,9 @@ def get_statistics(self): class StatisticsCurrentOwnershipOrControlStatementsAndReplacesStatementsMissing( AdditionalCheck ): + def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool: + return schema_object.is_schema_version_equal_to_or_less_than("0.3") + def __init__(self, lib_cove_bods_config, schema_object): super().__init__(lib_cove_bods_config, schema_object) self.count_replaces_statements_missing = 0 @@ -421,6 +426,46 @@ def get_statistics(self): } return data +class StatisticsStatementsRecordStatus( + AdditionalCheck +): + @staticmethod + def does_apply_to_schema(lib_cove_bods_config, schema_object) -> bool: + return schema_object.is_schema_version_equal_to_or_greater_than("0.4") + + def __init__(self, lib_cove_bods_config, schema_object): + super().__init__(lib_cove_bods_config, schema_object) + #self.count_replaces_statements_missing = 0 + #self.statement_ids = set() + #self.current_statement_ids = set() + self.records = {} + self.missing_new_records = {} + self.current_records_count = 0 + self.missing_new_records_count = 0 + + def check_statement_first_pass(self, statement): + if (isinstance(statement.get("recordStatus"), str) and + isinstance(statement.get("recordId"), str) and + statement.get("recordStatus") in ('new', 'updated', 'closed')): + if not statement.get("recordId") in self.records: + if not statement.get("recordStatus") == 'new': + self.missing_new_records[statement.get("recordId")] = statement.get("statementId") + self.records[statement.get("recordId")] = statement.get("recordStatus") + + def final_checks(self): + for record_id in self.records: + if not self.records[record_id] == 'closed': + self.current_records_count += 1 + for record_id in self.missing_new_records: + self.missing_new_records_count += 1 + + def get_statistics(self): + data = { + "count_records_current": self.current_records_count, + "count_new_records_missing": self.missing_new_records_count, + } + return data + class StatisticAddress(AdditionalCheck): def __init__(self, lib_cove_bods_config, schema_object): diff --git a/tests/test_stat_counts_0_4.py b/tests/test_stat_counts_0_4.py new file mode 100644 index 0000000..87c07ea --- /dev/null +++ b/tests/test_stat_counts_0_4.py @@ -0,0 +1,39 @@ +import os +import tempfile + +from tests.api import bods_json_output + + +def test_sample_mode_valid_1(): + + cove_temp_folder = tempfile.mkdtemp( + prefix="lib-cove-bods-tests-", dir=tempfile.gettempdir() + ) + json_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "0.4", + "sample_300_statements.json", + ) + + results = bods_json_output(cove_temp_folder, json_filename, sample_mode=True) + + print(results) + + assert results["schema_version"] == "0.4" + + assert results['statistics']['count_entity_statements'] == 50 + assert results['statistics']['count_entity_statements_types']['registeredEntity'] == 50 + assert results['statistics']['count_entity_statements_types_with_any_identifier']['registeredEntity'] == 50 + assert results['statistics']['count_entity_statements_types_with_any_identifier_with_id_and_scheme']['registeredEntity'] == 50 + assert results['statistics']['count_person_statements'] == 50 + assert results['statistics']['count_person_statements_types']['knownPerson'] == 50 + assert results['statistics']['count_ownership_or_control_statement'] == 50 + assert results['statistics']['count_ownership_or_control_statement_interested_party'] == 50 + assert results['statistics']['count_ownership_or_control_statement_interest_statement_types']['shareholding'] == 50 + assert results['statistics']['count_ownership_or_control_statement_by_year'][2020] == 50 + assert results['statistics']['count_ownership_or_control_statement_subject_by_year'][2020] == 50 + assert results['statistics']['count_ownership_or_control_statement_interested_party_by_year'][2020] == 50 + + +#'count_declaration_subjects': 50, 'count_records_current': 150, 'count_new_records_missing': 0