Skip to content

Commit

Permalink
PRMP-928 Add user login data and search to report(#439)
Browse files Browse the repository at this point in the history
---------
Co-authored-by: Rio Knightley <rio.knightley2@nhs.net>
Co-authored-by: NogaNHS <noga.sasson1@nhs.net>
Co-authored-by: bethany-kish-nhs <beth.kish1@nhs.net>
  • Loading branch information
steph-torres-nhs authored Oct 4, 2024
1 parent 3b3a36a commit ac3b5ea
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 47 deletions.
1 change: 1 addition & 0 deletions lambdas/models/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class OrganisationData(StatisticData):
daily_count_viewed: int = 0
daily_count_downloaded: int = 0
daily_count_deleted: int = 0
daily_count_searched: int = 0


class ApplicationData(StatisticData):
Expand Down
34 changes: 26 additions & 8 deletions lambdas/services/data_collection_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
CloudwatchLogsQueryParams,
LloydGeorgeRecordsDeleted,
LloydGeorgeRecordsDownloaded,
LloydGeorgeRecordsSearched,
LloydGeorgeRecordsStored,
LloydGeorgeRecordsViewed,
UniqueActiveUserIds,
Expand Down Expand Up @@ -68,11 +69,14 @@ def collect_all_data_and_write_to_dynamodb(self):
def collect_all_data(self) -> list[StatisticData]:
dynamodb_scan_result = self.scan_dynamodb_tables()
s3_list_objects_result = self.get_all_s3_files_info()
record_store_data = []

record_store_data = self.get_record_store_data(
dynamodb_scan_result, s3_list_objects_result
)
if dynamodb_scan_result:
record_store_data = self.get_record_store_data(
dynamodb_scan_result, s3_list_objects_result
)
organisation_data = self.get_organisation_data(dynamodb_scan_result)

application_data = self.get_application_data()

return record_store_data + organisation_data + application_data
Expand Down Expand Up @@ -126,6 +130,7 @@ def get_record_store_data(
dynamodb_scan_result: list[dict],
s3_list_objects_result: list[dict],
) -> list[RecordStoreData]:

total_number_of_records = self.get_total_number_of_records(dynamodb_scan_result)

total_and_average_file_sizes = (
Expand Down Expand Up @@ -159,6 +164,7 @@ def get_record_store_data(
def get_organisation_data(
self, dynamodb_scan_result: list[dict]
) -> list[OrganisationData]:

number_of_patients = self.get_number_of_patients(dynamodb_scan_result)
average_records_per_patient = self.get_average_number_of_files_per_patient(
dynamodb_scan_result
Expand All @@ -171,6 +177,9 @@ def get_organisation_data(
LloydGeorgeRecordsDeleted
)
daily_count_stored = self.get_cloud_watch_query_result(LloydGeorgeRecordsStored)
daily_count_searched = self.get_cloud_watch_query_result(
LloydGeorgeRecordsSearched
)

joined_query_result = self.join_results_by_ods_code(
[
Expand All @@ -180,11 +189,15 @@ def get_organisation_data(
daily_count_downloaded,
daily_count_deleted,
daily_count_stored,
daily_count_searched,
]
)

organisation_data_for_all_ods_code = [
OrganisationData(date=self.today_date, **organisation_data_properties)
OrganisationData(
date=self.today_date,
**organisation_data_properties,
)
for organisation_data_properties in joined_query_result
]

Expand All @@ -208,11 +221,14 @@ def get_active_user_list(self) -> dict[str, list]:
)
user_ids_per_ods_code = defaultdict(list)
for entry in query_result:
ods_code = entry.get("ods_code")
user_id = entry.get("user_id")
ods_code = entry.get("ods_code", "")
user_id = entry.get("user_id", "")
user_role = entry.get("user_role", "No role description")
role_code = entry.get("role_code", "No role code")
hashed_user_id = hashlib.sha256(bytes(user_id, "utf8")).hexdigest()
user_ids_per_ods_code[ods_code].append(hashed_user_id)

user_ids_per_ods_code[ods_code].append(
hashed_user_id + " - " + user_role + " - " + role_code
)
return user_ids_per_ods_code

def get_cloud_watch_query_result(
Expand Down Expand Up @@ -309,6 +325,8 @@ def get_average_number_of_files_per_patient(
self,
dynamodb_scan_result: list[dict],
) -> list[dict]:
if not dynamodb_scan_result:
return []
dynamodb_df = pl.DataFrame(dynamodb_scan_result)

count_records = pl.len().alias("number_of_records")
Expand Down
2 changes: 2 additions & 0 deletions lambdas/services/login_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
TooManyOrgsException,
)
from utils.lambda_exceptions import LoginException
from utils.request_context import request_context

logger = LoggingService(__name__)

Expand Down Expand Up @@ -240,6 +241,7 @@ def issue_auth_token(
"ndr_session_id": session_id,
"nhs_user_id": user_id,
}
request_context.authorization = ndr_token_content

authorisation_token = jwt.encode(
ndr_token_content, private_key, algorithm="RS256"
Expand Down
21 changes: 14 additions & 7 deletions lambdas/services/statistical_report_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,14 +154,21 @@ def summarise_application_data(

df = self.load_data_to_polars(application_data)

count_unique_ids = (
pl.concat_list("active_user_ids_hashed")
.flatten()
.unique()
.len()
.alias("active_users_count")
summarised_data = df.group_by("ods_code").agg(
[
pl.concat_list("active_user_ids_hashed")
.flatten()
.unique()
.apply(lambda col: str(col.sort().to_list()))
.alias("unique_active_user_ids_hashed"),
pl.concat_list("active_user_ids_hashed")
.flatten()
.unique()
.len()
.alias("active_users_count"),
]
)
summarised_data = df.group_by("ods_code").agg(count_unique_ids)

return summarised_data

def join_dataframes_by_ods_code(
Expand Down
14 changes: 10 additions & 4 deletions lambdas/tests/unit/helpers/data/statistic/mock_collected_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
TOTAL_FILE_SIZE_FOR_Y12345,
)
from tests.unit.helpers.data.statistic.mock_logs_query_results import (
HASHED_USER_ID_1,
HASHED_USER_ID_2,
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
HASHED_USER_ID_1_WITH_PCSE_ROLE,
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
)

TODAY_DATE = datetime.today().strftime("%Y%m%d")
Expand Down Expand Up @@ -45,6 +46,7 @@
daily_count_viewed=40,
daily_count_downloaded=20,
daily_count_deleted=2,
daily_count_searched=30,
),
OrganisationData(
statistic_id="mock_uuid",
Expand All @@ -56,6 +58,7 @@
daily_count_viewed=20,
daily_count_downloaded=10,
daily_count_deleted=1,
daily_count_searched=50,
),
]

Expand All @@ -64,13 +67,16 @@
statistic_id="mock_uuid",
date=TODAY_DATE,
ods_code="H81109",
active_user_ids_hashed=[HASHED_USER_ID_1, HASHED_USER_ID_2],
active_user_ids_hashed=[
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
],
),
ApplicationData(
statistic_id="mock_uuid",
date=TODAY_DATE,
ods_code="Y12345",
active_user_ids_hashed=[HASHED_USER_ID_1],
active_user_ids_hashed=[HASHED_USER_ID_1_WITH_PCSE_ROLE],
),
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,24 @@
USER_ID_2 = "9E7F1235-3DF1-4822-AFFB-C4FCC88C2690"
HASHED_USER_ID_1 = "3192b6cf7ef953cf1a1f0945a83b55ab2cb8bae95cac6548ae5412aaa4c67677"
HASHED_USER_ID_2 = "a89d1cb4ac0776e45131c65a69e8b1a48026e9b497c94409e480588418a016e4"
HASHED_USER_ID_1_WITH_ADMIN_ROLE = f"{HASHED_USER_ID_1} - GP_ADMIN - RO76"
HASHED_USER_ID_1_WITH_PCSE_ROLE = f"{HASHED_USER_ID_1} - PCSE - "
HASHED_USER_ID_2_WITH_CLINICAL_ROLE = f"{HASHED_USER_ID_2} - GP_CLINICAL - RO76"


MOCK_UNIQUE_ACTIVE_USER_IDS = [
{
"ods_code": "Y12345",
"user_id": USER_ID_1,
},
{"ods_code": "Y12345", "user_id": USER_ID_1, "role_code": "", "user_role": "PCSE"},
{
"ods_code": "H81109",
"user_id": USER_ID_1,
"role_code": "RO76",
"user_role": "GP_ADMIN",
},
{
"ods_code": "H81109",
"user_id": USER_ID_2,
"role_code": "RO76",
"user_role": "GP_CLINICAL",
},
]

Expand Down Expand Up @@ -63,6 +68,18 @@
},
]

MOCK_PATIENT_SEARCHED = [
{
"ods_code": "Y12345",
"daily_count_searched": "50",
},
{
"ods_code": "H81109",
"daily_count_searched": "30",
},
]


MOCK_RESPONSE_QUERY_IN_PROGRESS = {"status": "Running"}

MOCK_RESPONSE_QUERY_FAILED = {"status": "Failed"}
Expand Down
46 changes: 38 additions & 8 deletions lambdas/tests/unit/helpers/data/statistic/mock_statistic_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"weekly_count_viewed": 35,
"weekly_count_downloaded": 4,
"weekly_count_deleted": 1,
"weekly_count_searched": 0,
"average_records_per_patient": 4.5,
"number_of_patients": 4,
},
Expand All @@ -133,6 +134,7 @@
"weekly_count_viewed": 15 + 30,
"weekly_count_downloaded": 1 + 5,
"weekly_count_deleted": 1 + 1,
"weekly_count_searched": 0,
"average_records_per_patient": (3.51 + 2.78) / 2,
"number_of_patients": 10,
},
Expand Down Expand Up @@ -202,14 +204,6 @@
"cf1af742e351ce63d8ed275d4bec8d8f",
],
)

EXPECTED_SUMMARY_APPLICATION_DATA = pl.DataFrame(
[
{"ods_code": "Z56789", "active_users_count": 1},
{"ods_code": "Y12345", "active_users_count": 3},
],
)

SERIALISED_APPLICATION_DATA = [
{
"Date": "20240510",
Expand Down Expand Up @@ -239,6 +233,30 @@
},
]

EXPECTED_SUMMARY_APPLICATION_DATA = pl.DataFrame(
[
{
"ods_code": "Z56789",
"active_users_count": 1,
"unique_active_user_ids_hashed": str(
[str(SERIALISED_APPLICATION_DATA[0]["ActiveUserIdsHashed"][0])]
),
},
{
"ods_code": "Y12345",
"active_users_count": 3,
"unique_active_user_ids_hashed": str(
[
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][0]),
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][1]),
str(SERIALISED_APPLICATION_DATA[2]["ActiveUserIdsHashed"][1]),
]
),
},
],
)


ALL_MOCKED_STATISTIC_DATA = (
[MOCK_RECORD_STORE_DATA_1, MOCK_RECORD_STORE_DATA_2, MOCK_RECORD_STORE_DATA_3],
[MOCK_ORGANISATION_DATA_1, MOCK_ORGANISATION_DATA_2, MOCK_ORGANISATION_DATA_3],
Expand Down Expand Up @@ -274,8 +292,12 @@
"Number of patients": 4,
"Total number of records": 18,
"Total size of records in megabytes": 1.75,
"Unique active user ids hashed": str(
[str(SERIALISED_APPLICATION_DATA[0]["ActiveUserIdsHashed"][0])]
),
"Weekly count deleted": 1,
"Weekly count downloaded": 4,
"Weekly count searched": 0,
"Weekly count stored": 0,
"Weekly count viewed": 35,
},
Expand All @@ -289,8 +311,16 @@
"Number of patients": 10,
"Total number of records": 20,
"Total size of records in megabytes": 2.34,
"Unique active user ids hashed": str(
[
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][0]),
str(SERIALISED_APPLICATION_DATA[1]["ActiveUserIdsHashed"][1]),
str(SERIALISED_APPLICATION_DATA[2]["ActiveUserIdsHashed"][1]),
]
),
"Weekly count deleted": 1 + 1,
"Weekly count downloaded": 1 + 5,
"Weekly count searched": 0,
"Weekly count stored": 0 + 2,
"Weekly count viewed": 15 + 30,
},
Expand Down
18 changes: 12 additions & 6 deletions lambdas/tests/unit/services/test_data_collection_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,21 @@
build_mock_results,
)
from tests.unit.helpers.data.statistic.mock_logs_query_results import (
HASHED_USER_ID_1,
HASHED_USER_ID_2,
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
HASHED_USER_ID_1_WITH_PCSE_ROLE,
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
MOCK_LG_DELETED,
MOCK_LG_DOWNLOADED,
MOCK_LG_STORED,
MOCK_LG_VIEWED,
MOCK_PATIENT_SEARCHED,
MOCK_UNIQUE_ACTIVE_USER_IDS,
)
from utils.cloudwatch_logs_query import (
CloudwatchLogsQueryParams,
LloydGeorgeRecordsDeleted,
LloydGeorgeRecordsDownloaded,
LloydGeorgeRecordsSearched,
LloydGeorgeRecordsStored,
LloydGeorgeRecordsViewed,
UniqueActiveUserIds,
Expand Down Expand Up @@ -99,6 +102,8 @@ def mock_implementation(query_params: CloudwatchLogsQueryParams, **_kwargs):
return MOCK_LG_STORED
elif query_params == UniqueActiveUserIds:
return MOCK_UNIQUE_ACTIVE_USER_IDS
elif query_params == LloydGeorgeRecordsSearched:
return MOCK_PATIENT_SEARCHED

patched_instance = mocker.patch(
"services.data_collection_service.CloudwatchService",
Expand Down Expand Up @@ -168,11 +173,12 @@ def test_collect_all_data_and_write_to_dynamodb(mock_service, mocker):


def test_collect_all_data(mock_service, mock_uuid):
actual = mock_service.collect_all_data()
expected = unordered(
MOCK_RECORD_STORE_DATA + MOCK_ORGANISATION_DATA + MOCK_APPLICATION_DATA
)

actual = mock_service.collect_all_data()

assert actual == expected


Expand Down Expand Up @@ -250,10 +256,10 @@ def test_get_active_user_list(set_env, mock_query_logs):
service = DataCollectionService()
expected = {
"H81109": [
HASHED_USER_ID_1,
HASHED_USER_ID_2,
HASHED_USER_ID_1_WITH_ADMIN_ROLE,
HASHED_USER_ID_2_WITH_CLINICAL_ROLE,
],
"Y12345": [HASHED_USER_ID_1],
"Y12345": [HASHED_USER_ID_1_WITH_PCSE_ROLE],
}
actual = service.get_active_user_list()

Expand Down
Loading

0 comments on commit ac3b5ea

Please sign in to comment.