diff --git a/hakai_ckan_records_checks/__main__.py b/hakai_ckan_records_checks/__main__.py index cd5d72ea..ee20c1e9 100644 --- a/hakai_ckan_records_checks/__main__.py +++ b/hakai_ckan_records_checks/__main__.py @@ -4,11 +4,11 @@ from pathlib import Path import click -import numpy as np import pandas as pd from jinja2 import Environment, FileSystemLoader from loguru import logger from tqdm import tqdm +import plotly.express as px from hakai_ckan_records_checks import hakai from hakai_ckan_records_checks.ckan import CKAN @@ -126,10 +126,23 @@ def main(ckan_url, api_key, output, max_workers, log_level, cache): logger.info(f"Saving results to: '{output}'") results["catalog_summary"] = format_summary(results["catalog_summary"]) + # Combine summary and issues + combined_issues = results['test_results'].merge(results['catalog_summary'], left_on='record_id', right_on='id').drop(columns=['id']) + standardized_issues = combined_issues.copy() + standardized_issues['message'] = standardized_issues['message'].str.replace('resources\[[0-9]+\]','resources[...]',regex=True) + + # Generate figures + pie_chart = px.pie(standardized_issues, names='message',title='Hakai Records Issues Distribution') + pie_chart.update_traces(textposition='inside') + pie_chart.update_layout(uniformtext_minsize=12, uniformtext_mode='hide') + pie_chart_html = pie_chart.to_html(full_html=False) + # save results Path(output).mkdir(parents=True, exist_ok=True) environment.get_template("index.html.jinja").stream( catalog_summary=format_summary(results["catalog_summary"]), + issues_pie_chart=pie_chart_html, + issues_table = combined_issues, time=pd.Timestamp.utcnow(), ckan_url=ckan_url, ).dump(f"{output}/index.html") @@ -143,6 +156,9 @@ def main(ckan_url, api_key, output, max_workers, log_level, cache): time=pd.Timestamp.utcnow(), ).dump(f"{output}/issues/{record_id}.html") + # save results + results['catalog_summary'].to_csv(f"{output}/catalog_summary.csv", index=False) + results['test_results'].to_csv(f"{output}/test_results.csv", index=False) if __name__ == "__main__": main() diff --git a/hakai_ckan_records_checks/hakai.py b/hakai_ckan_records_checks/hakai.py index f2b86a4f..e3465300 100644 --- a/hakai_ckan_records_checks/hakai.py +++ b/hakai_ckan_records_checks/hakai.py @@ -55,20 +55,20 @@ def _test(condition, level, message): # Review publisher - # Review ressources - for index, ressource in enumerate(record["resources"]): - _test(ressource["name"] != "", "ERROR", "Empty ressource name") - _test(ressource["url"] != "", "ERROR", "Empty ressource url") - _test(ressource["format"] != "", "ERROR", "Empty ressource format") + # Review resources + for index, resource in enumerate(record["resources"]): + _test(resource["name"] != "", "ERROR", "Empty resource name") + _test(resource["url"] != "", "ERROR", "Empty resource url") + _test(resource["format"] != "", "ERROR", "Empty resource format") _test( - ressource["format"] in ["HTML"], + resource["format"] in ["HTML","ERDDAP","OBIS"], "ERROR", - f"Invalid ressource format: resources[{index}].format={ressource['format']}", + f"Invalid resource format: resources[{index}].format={resource['format']}", ) _test( - requests.get(ressource["url"]).status_code == 200, + requests.get(resource["url"]).status_code == 200, "ERROR", - f"Invalid ressources[{index}].url.status_code={requests.get(ressource['url']).status_code}", + f"Invalid resources[{index}].url.status_code={requests.get(resource['url']).status_code}", ) # test spatial diff --git a/hakai_ckan_records_checks/templates/index.html.jinja b/hakai_ckan_records_checks/templates/index.html.jinja index eafc6b4a..d55510fa 100644 --- a/hakai_ckan_records_checks/templates/index.html.jinja +++ b/hakai_ckan_records_checks/templates/index.html.jinja @@ -8,6 +8,7 @@ integrity="sha256-p4NxAoJBhIIN+hmNHrzRCf9tD/miZyoHS5obTRR9BMY=" crossorigin=""/> +
This page present a summary of the different record hosted on the CKAN Catalogue.
- -This page present a summary of:
+
Records hosted on the catalogue
+
Issues detected in some records
+