diff --git a/poetry.lock b/poetry.lock index 530b95c9e..f7c5329a6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -599,6 +599,17 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "django" version = "4.2.7" @@ -621,7 +632,7 @@ bcrypt = ["bcrypt"] [[package]] name = "django-ansible-base" -version = "2024.9.12.0.dev4+ga44d6bf" +version = "2024.9.17.0.dev9+gc8fbc1e" description = "A Django app used by ansible services" optional = false python-versions = ">=3.9" @@ -629,20 +640,20 @@ files = [] develop = false [package.dependencies] -channels = {version = "*", optional = true, markers = "extra == \"channel-auth\""} +channels = {version = "*", optional = true, markers = "extra == \"channel_auth\""} cryptography = "*" Django = ">=4.2.5,<4.3.0" django-crum = "*" -django-redis = {version = "*", optional = true, markers = "extra == \"redis-client\""} +django-redis = {version = "*", optional = true, markers = "extra == \"redis_client\""} django-split-settings = "*" djangorestframework = "*" inflection = "*" -redis = {version = "*", optional = true, markers = "extra == \"redis-client\""} +redis = {version = "*", optional = true, markers = "extra == \"redis_client\""} [package.extras] -all = ["channels", "cryptography", "django-auth-ldap", "django-oauth-toolkit (<2.4.0)", "django-redis", "drf-spectacular", "pyjwt", "pyrad", "pytest", "pytest-django", "python-ldap", "python3-saml", "redis", "requests", "social-auth-app-django", "tabulate", "tacacs_plus", "xmlsec (==1.3.13)"] +all = ["channels", "cryptography", "django-auth-ldap", "django-oauth-toolkit (<2.4.0)", "django-redis", "drf-spectacular", "pyjwt", "pyrad", "pytest", "pytest-django", "python-ldap", "python3-saml", "redis", "requests", "social-auth-app-django (==5.4.1)", "tabulate", "tacacs-plus", "xmlsec (==1.3.13)"] api-documentation = ["drf-spectacular"] -authentication = ["django-auth-ldap", "pyrad", "python-ldap", "python3-saml", "social-auth-app-django", "tabulate", "tacacs_plus", "xmlsec (==1.3.13)"] +authentication = ["django-auth-ldap", "pyrad", "python-ldap", "python3-saml", "social-auth-app-django (==5.4.1)", "tabulate", "tacacs-plus", "xmlsec (==1.3.13)"] channel-auth = ["channels"] jwt-consumer = ["pyjwt", "requests"] oauth2-provider = ["django-oauth-toolkit (<2.4.0)"] @@ -653,7 +664,7 @@ testing = ["cryptography", "pytest", "pytest-django"] type = "git" url = "https://github.com/ansible/django-ansible-base.git" reference = "devel" -resolved_reference = "a44d6bfab27ffb33a2bd04604b0cb88c7dd7762d" +resolved_reference = "c8fbc1e345d4908cc97eaae20771238a5dd35aad" [[package]] name = "django-crum" @@ -1094,6 +1105,21 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "insights-analytics-collector" +version = "0.3.2" +description = "Collector Package for Insights for AAP" +optional = false +python-versions = "*" +files = [ + {file = "insights-analytics-collector-0.3.2.tar.gz", hash = "sha256:c8464e2f602b01e42574bdad85ed4c8afca9d613c8c886fcbe72f592899f520b"}, + {file = "insights_analytics_collector-0.3.2-py3-none-any.whl", hash = "sha256:0e88b938d05df83ced969fd0ee29e8452745240ae622fde75aded97a56ee1cf8"}, +] + +[package.dependencies] +django = "*" +requests = "*" + [[package]] name = "ipython" version = "8.17.2" @@ -2795,4 +2821,4 @@ dev = ["psycopg-binary"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "78e2f602f1583537aae38421cf838d9d494b32d34c1e0877e22d2891753d614c" +content-hash = "730133e85efbdd484d267aeee8d34d057c4b12a4aefe0f282522a179a2e21179" diff --git a/pyproject.toml b/pyproject.toml index 17c099989..bc0c9ad86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,8 @@ psycopg = "^3.1.17" xxhash = "*" pyjwt = { version = "*", extras = ["crypto"] } ecdsa = "*" +insights-analytics-collector = "^0.3.2" +distro = "^1.9.0" [tool.poetry.group.test.dependencies] pytest = "*" diff --git a/src/aap_eda/analytics/analytics_collectors.py b/src/aap_eda/analytics/analytics_collectors.py new file mode 100644 index 000000000..cd9fe7fbe --- /dev/null +++ b/src/aap_eda/analytics/analytics_collectors.py @@ -0,0 +1,360 @@ +import os +import platform +from datetime import datetime + +import distro +from ansible_base.resource_registry.models.service_identifier import service_id +from django.conf import settings +from django.db.models import Manager, Q +from insights_analytics_collector import CsvFileSplitter, register + +from aap_eda.analytics.collector import AnalyticsCollector +from aap_eda.core import models +from aap_eda.utils import get_eda_version + + +@register( + "config", + "1.0", + description="General platform configuration.", + config=True, +) +def config(**kwargs) -> dict: + install_type = "traditional" + if os.environ.get("container") == "oci": + install_type = "openshift" + elif "KUBERNETES_SERVICE_PORT" in os.environ: + install_type = "k8s" + return { + "install_uuid": service_id(), + "platform": { + "system": platform.system(), + "dist": distro.linux_distribution(), + "release": platform.release(), + "type": install_type, + }, + # skip license related info so far + "eda_log_level": settings.APP_LOG_LEVEL, + "eda_version": get_eda_version(), + "eda_deployment_type": settings.DEPLOYMENT_TYPE, + } + + +@register( + "activations_table", + "1.0", + format="csv", + description="Data on activations", +) +def activations_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.Activation.objects, since, until) + + return _copy_table("activations", query, full_path) + + +@register( + "audit_action_table", + "1.0", + format="csv", + description="Data on audit_actions", +) +def audit_actions_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + audit_actions = _get_audit_action_qs(since, until) + + if not bool(audit_actions): + return + + audit_action_query = ( + f"COPY ({audit_actions.query}) TO STDOUT WITH CSV HEADER" + ) + + return _copy_table("audit_actions", audit_action_query, full_path) + + +@register( + "audit_event_table", + "1.0", + format="csv", + description="Data on audit_events", +) +def audit_events_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + audit_actions = _get_audit_action_qs(since, until) + if not bool(audit_actions): + return + + audit_event_query = _get_audit_event_query(audit_actions) + if not bool(audit_event_query): + return + + return _copy_table("audit_events", audit_event_query, full_path) + + +@register( + "audit_rule_table", + "1.0", + format="csv", + description="Data on audit_rules", +) +def audit_rules_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + audit_rules = _get_audit_rule_qs(since, until) + if not bool(audit_rules): + return + + audit_rule_query = f"COPY ({audit_rules.query}) TO STDOUT WITH CSV HEADER" + + return _copy_table("audit_rules", audit_rule_query, full_path) + + +@register( + "eda_credential_table", + "1.0", + format="csv", + description="Data on eda_credentials", +) +def eda_credentials_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.EdaCredential.objects, since, until) + + return _copy_table("eda_credentials", query, full_path) + + +@register( + "credential_type_table", + "1.0", + format="csv", + description="Data on credential_types", +) +def credential_types_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.CredentialType.objects, since, until) + + return _copy_table("credential_types", query, full_path) + + +@register( + "decision_environment_table", + "1.0", + format="csv", + description="Data on decision_environments", +) +def decision_environments_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.DecisionEnvironment.objects, since, until) + return _copy_table("decision_environments", query, full_path) + + +@register( + "event_stream_table", + "1.0", + format="csv", + description="Data on event_streams", +) +def event_streams_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.EventStream.objects, since, until) + return _copy_table("event_streams", query, full_path) + + +@register( + "project_table", + "1.0", + format="csv", + description="Data on projects", +) +def projects_table(since: datetime, full_path: str, until: datetime, **kwargs): + query = _get_query(models.Project.objects, since, until) + return _copy_table("projects", query, full_path) + + +@register( + "rulebook_table", + "1.0", + format="csv", + description="Data on rulebooks", +) +def rulebooks_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.Rulebook.objects, since, until) + return _copy_table("rulebooks", query, full_path) + + +@register( + "rulebook_process_table", + "1.0", + format="csv", + description="Data on rulebook_processes", +) +def rulebook_processes_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + args = {"started_at": True} + query = _get_query(models.RulebookProcess.objects, since, until, **args) + return _copy_table("rulebook_processes", query, full_path) + + +@register( + "organization_table", + "1.0", + format="csv", + description="Data on organizations", +) +def organizations_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + args = {"created": True} + query = _get_query(models.Organization.objects, since, until, **args) + return _copy_table("organizations", query, full_path) + + +@register( + "team_table", + "1.0", + format="csv", + description="Data on teams", +) +def teams_table(since: datetime, full_path: str, until: datetime, **kwargs): + args = {"created": True} + query = _get_query(models.Team.objects, since, until, **args) + + return _copy_table("teams", query, full_path) + + +def _datetime_format(dt: datetime) -> str: + """Convert datetime object to string.""" + if dt.microsecond == 0: + iso_format = dt.strftime("%Y-%m-%d %H:%M:%S%z") + else: + iso_format = dt.strftime("%Y-%m-%d %H:%M:%S.%f%z") + + return iso_format[:-2] + ":" + iso_format[-2:] + + +def _get_query( + objects: Manager, since: datetime, until: datetime, **kwargs +) -> str: + """Construct sql query with datetime params.""" + if kwargs.get("started_at"): + qs = ( + objects.filter( + Q(started_at__gt=since, started_at__lte=until) + | Q(updated_at__gt=since, updated_at__lte=until) + ) + .order_by("id") + .distinct() + ) + elif kwargs.get("created"): + qs = ( + objects.filter( + Q(created__gt=since, created__lte=until) + | Q(modified__gt=since, modified__lte=until) + ) + .order_by("id") + .distinct() + ) + else: + qs = ( + objects.filter( + Q(created_at__gt=since, created_at__lte=until) + | Q(modified_at__gt=since, modified_at__lte=until) + ) + .order_by("id") + .distinct() + ) + + query = ( + str(qs.query) + .replace(_datetime_format(since), f"'{since.isoformat()}'") + .replace(_datetime_format(until), f"'{until.isoformat()}'") + ) + + return f"COPY ({query}) TO STDOUT WITH CSV HEADER" + + +def _get_audit_event_query(actions: list[models.AuditAction]): + events = models.AuditEvent.objects.none() + for action in actions: + events |= action.audit_events.all() + + if not bool(events): + return + + query = str(events.distinct().query) + + for action in actions: + query = query.replace(str(action.id), f"'{action.id}'") + + return f"COPY ({query}) TO STDOUT WITH CSV HEADER" + + +def _get_audit_rule_qs(since: datetime, until: datetime): + activation_instance_ids = ( + models.RulebookProcess.objects.filter( + Q( + started_at__gt=since.isoformat(), + started_at__lte=until.isoformat(), + ) + | Q( + updated_at__gt=since.isoformat(), + updated_at__lte=until.isoformat(), + ) + ) + .values_list("id", flat=True) + .distinct() + ) + + if len(activation_instance_ids) == 0: + return models.RulebookProcess.objects.none() + + if len(activation_instance_ids) == 1: + audit_rules = models.AuditRule.objects.filter( + activation_instance_id=activation_instance_ids[0] + ).order_by("id") + else: + audit_rules = models.AuditRule.objects.filter( + activation_instance_id__in=tuple(activation_instance_ids) + ).order_by("id") + + return audit_rules + + +def _get_audit_action_qs(since: datetime, until: datetime): + audit_rules = _get_audit_rule_qs(since, until) + audit_rule_ids = audit_rules.values_list("id").distinct() + + if len(audit_rule_ids) == 0: + return models.AuditRule.objects.none() + + if len(audit_rule_ids) == 1: + audit_actions = models.AuditAction.objects.filter( + audit_rule_id=audit_rule_ids[0], + ).order_by("id") + else: + audit_actions = models.AuditAction.objects.filter( + audit_rule_id__in=tuple(audit_rule_ids) + ).order_by("id") + + return audit_actions + + +def _copy_table(table, query, path): + file_path = os.path.join(path, table + "_table.csv") + file = CsvFileSplitter(filespec=file_path) + with AnalyticsCollector.db_connection().cursor() as cursor: + with cursor.copy(query) as copy: + while data := copy.read(): + byte_data = bytes(data) + file.write(byte_data.decode()) + return file.file_list() diff --git a/src/aap_eda/analytics/collector.py b/src/aap_eda/analytics/collector.py new file mode 100644 index 000000000..19dc4a0d1 --- /dev/null +++ b/src/aap_eda/analytics/collector.py @@ -0,0 +1,84 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +from datetime import datetime +from typing import Optional + +from django.core.serializers.json import DjangoJSONEncoder +from django.db import connection +from insights_analytics_collector import Collector + +from aap_eda.analytics.package import Package +from aap_eda.analytics.utils import datetime_hook +from aap_eda.conf.settings import application_settings + + +class AnalyticsCollector(Collector): + @staticmethod + def db_connection() -> connection: + return connection + + @staticmethod + def _package_class() -> Package: + return Package + + def _is_shipping_configured(self) -> bool: + if not application_settings.INSIGHTS_TRACKING_STATE: + self.logger.warning( + "Insights for Event Driven Ansible is not enabled." + ) + return False + + return True + + def _is_valid_license(self) -> bool: + # ignore license information checking for now + return True + + def _last_gathering(self) -> Optional[str]: + self.logger.info( + "Last gather: " + f"{application_settings.AUTOMATION_ANALYTICS_LAST_GATHER}" + ) + + return ( + datetime.fromisoformat( + application_settings.AUTOMATION_ANALYTICS_LAST_GATHER + ) + if bool(application_settings.AUTOMATION_ANALYTICS_LAST_GATHER) + else None + ) + + def _load_last_gathered_entries(self) -> str: + last_entries = application_settings.AUTOMATION_ANALYTICS_LAST_ENTRIES + last_entries = last_entries.replace("'", '"') + self.logger.info(f"Last collect entries: {last_entries}") + + return json.loads(last_entries, object_hook=datetime_hook) + + def _save_last_gathered_entries(self, last_gathered_entries: dict) -> None: + application_settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps( + last_gathered_entries, cls=DjangoJSONEncoder + ) + self.logger.info( + "Save last_entries: " + f"{application_settings.AUTOMATION_ANALYTICS_LAST_ENTRIES}" + ) + + def _save_last_gather(self) -> None: + self.logger.info(f"Save last_gather: {self.gather_until}") + + application_settings.AUTOMATION_ANALYTICS_LAST_GATHER = ( + self.gather_until.isoformat() + ) diff --git a/src/aap_eda/analytics/package.py b/src/aap_eda/analytics/package.py new file mode 100644 index 000000000..7a59f2c8b --- /dev/null +++ b/src/aap_eda/analytics/package.py @@ -0,0 +1,77 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from django.conf import settings +from insights_analytics_collector import Package as InsightsAnalyticsPackage + +from aap_eda.conf import application_settings + + +class MissingUserPasswordError(Exception): + pass + + +class Package(InsightsAnalyticsPackage): + PAYLOAD_CONTENT_TYPE = "application/vnd.redhat.aap-eda.filename+tgz" + CERT_PATH = settings.INSIGHTS_CERT_PATH + + def _tarname_base(self) -> str: + timestamp = self.collector.gather_until + return f'eda-analytics-{timestamp.strftime("%Y-%m-%d-%H%M%S%z")}' + + def get_ingress_url(self) -> str: + return application_settings.AUTOMATION_ANALYTICS_URL + + def shipping_auth_mode(self) -> str: + return settings.AUTOMATION_AUTH_METHOD + + def _get_rh_user(self) -> str: + self._check_users() + user_name = ( + application_settings.REDHAT_USERNAME + | application_settings.SUBSCRIPTIONS_USERNAME + ) + + return user_name + + def _get_rh_password(self) -> str: + self._check_users() + user_password = ( + application_settings.REDHAT_PASSWORD + | application_settings.SUBSCRIPTIONS_PASSWORD + ) + + return user_password + + def _get_http_request_headers(self) -> dict: + return { + "Content-Type": self.PAYLOAD_CONTENT_TYPE, + "User-Agent": "EDA-metrics-agent", + } + + def _check_users(self) -> None: + if ( + application_settings.REDHAT_USERNAME + and application_settings.REDHAT_PASSWORD + ): + return + + if ( + application_settings.SUBSCRIPTIONS_USERNAME + and application_settings.SUBSCRIPTIONS_PASSWORD + ): + return + + raise MissingUserPasswordError( + "User information is missing in application settings" + ) diff --git a/src/aap_eda/analytics/utils.py b/src/aap_eda/analytics/utils.py new file mode 100644 index 000000000..8687b77b5 --- /dev/null +++ b/src/aap_eda/analytics/utils.py @@ -0,0 +1,25 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from django.utils.dateparse import parse_datetime + + +def datetime_hook(dt: dict) -> dict: + new_dt = {} + for key, value in dt.items(): + try: + new_dt[key] = parse_datetime(value) + except TypeError: + new_dt[key] = value + return new_dt diff --git a/src/aap_eda/conf/registry.py b/src/aap_eda/conf/registry.py index e9b3b7032..30e41d217 100644 --- a/src/aap_eda/conf/registry.py +++ b/src/aap_eda/conf/registry.py @@ -84,8 +84,7 @@ class RegistryData(object): ), RegistryData( name="AUTOMATION_ANALYTICS_LAST_ENTRIES", - type=dict, - default={}, + default="{}", # noqa P103 ), RegistryData( name="AUTOMATION_ANALYTICS_GATHER_INTERVAL", diff --git a/src/aap_eda/core/management/commands/gather_analytics.py b/src/aap_eda/core/management/commands/gather_analytics.py new file mode 100644 index 000000000..271e61ed6 --- /dev/null +++ b/src/aap_eda/core/management/commands/gather_analytics.py @@ -0,0 +1,107 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from dateutil import parser +from django.core.management.base import BaseCommand, CommandParser +from django.utils import timezone + +from aap_eda.analytics import analytics_collectors +from aap_eda.analytics.collector import AnalyticsCollector + + +class Command(BaseCommand): + """Collect analytics data.""" + + help = "Collect analytics data" + + def add_arguments(self, parser: CommandParser) -> None: + parser.add_argument( + "--dry-run", + dest="dry-run", + action="store_true", + help=( + "Gather analytics without shipping. Works even if analytics" + " are disabled in settings." + ), + ) + parser.add_argument( + "--ship", + dest="ship", + action="store_true", + help="Enable to ship metrics to the Red Hat Cloud", + ) + parser.add_argument( + "--since", + dest="since", + action="store", + help="Start date for collection", + ) + parser.add_argument( + "--until", + dest="until", + action="store", + help="End date for collection", + ) + + def init_logging(self) -> None: + self.logger = logging.getLogger("aap_eda.analytics") + handler = logging.StreamHandler() + handler.setLevel(logging.INFO) + handler.setFormatter(logging.Formatter("%(message)s")) + self.logger.addHandler(handler) + self.logger.propagate = False + + def handle(self, *args, **options): + self.init_logging() + opt_ship = options.get("ship") + opt_dry_run = options.get("dry-run") + opt_since = options.get("since") + opt_until = options.get("until") + + since = parser.parse(opt_since) if opt_since else None + if since and since.tzinfo is None: + since = since.replace(tzinfo=timezone.utc) + + until = parser.parse(opt_until) if opt_until else None + if until and until.tzinfo is None: + until = until.replace(tzinfo=timezone.utc) + + if opt_ship and opt_dry_run: + self.logger.error( + "Both --ship and --dry-run cannot be processed " + "at the same time." + ) + return + + if not opt_ship and not opt_dry_run: + self.logger.error("Either --ship or --dry-run needs to be set.") + return + + collector = AnalyticsCollector( + collector_module=analytics_collectors, + collection_type="manual" if opt_ship else "dry-run", + logger=self.logger, + ) + tgzfiles = collector.gather(since=since, until=until) + + if not tgzfiles: + self.logger.info("No analytics collected") + return + + for tgz in tgzfiles: + self.logger.info(tgz) + + self.logger.info("Analytics collection is done") diff --git a/src/aap_eda/settings/default.py b/src/aap_eda/settings/default.py index fd524d109..9ba598a4f 100644 --- a/src/aap_eda/settings/default.py +++ b/src/aap_eda/settings/default.py @@ -791,5 +791,11 @@ def get_rulebook_process_log_level() -> RulebookProcessLogLevel: settings.get("MAX_PG_NOTIFY_MESSAGE_SIZE", 6144) ) +# -------------------------------------------------------- +# METRICS COLLECTIONS: +# -------------------------------------------------------- AUTOMATION_ANALYTICS_URL = settings.get("AUTOMATION_ANALYTICS_URL", "") INSIGHTS_CERT_PATH = settings.get("INSIGHTS_CERT_PATH", "") +# Available methods: +# https://github.com/RedHatInsights/insights-analytics-collector/blob/main/insights_analytics_collector/package.py#L27 +AUTOMATION_AUTH_METHOD = settings.get("AUTOMATION_AUTH_METHOD", "user-pass") diff --git a/tests/integration/analytics/test_analytics_collectors.py b/tests/integration/analytics/test_analytics_collectors.py new file mode 100644 index 000000000..aff6e145c --- /dev/null +++ b/tests/integration/analytics/test_analytics_collectors.py @@ -0,0 +1,605 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import csv +import io +import json +import os +import tarfile +import tempfile +from datetime import timedelta + +import pytest +from django.utils.timezone import now +from insights_analytics_collector import Collector + +from aap_eda.analytics import analytics_collectors as collectors +from aap_eda.analytics.collector import AnalyticsCollector +from aap_eda.conf import settings_registry +from aap_eda.core import models + + +@pytest.fixture(autouse=True) +def register() -> None: + settings_registry.persist_registry_data() + return None + + +@pytest.mark.django_db +def test_internal_infra_files(): + collector = AnalyticsCollector( + collection_type=Collector.DRY_RUN, collector_module=collectors + ) + until = now() + time_start = until - timedelta(hours=9) + + tgz_files = collector.gather(since=time_start, until=until) + + assert len(tgz_files) == 1 + + files = {} + with tarfile.open(tgz_files[0], "r:gz") as archive: + for member in archive.getmembers(): + files[member.name] = archive.extractfile(member) + + assert "./config.json" in files + assert "./manifest.json" in files + assert "./data_collection_status.csv" in files + + config_json = json.loads(files["./config.json"].read()) + manifest_json = json.loads(files["./manifest.json"].read()) + data_collection_status_csv = io.BytesIO( + files["./data_collection_status.csv"].read() + ) + data_collection_status = io.TextIOWrapper( + data_collection_status_csv, encoding="utf-8" + ) + + assert len(config_json.keys()) == 5 + for key in config_json.keys(): + assert key in [ + "install_uuid", + "platform", + "eda_log_level", + "eda_version", + "eda_deployment_type", + ] + assert manifest_json["config.json"] == "1.0" + assert manifest_json["data_collection_status.csv"] == "1.0" + + reader = csv.reader(data_collection_status) + header = next(reader) + lines = list(reader) + + assert header == [ + "collection_start_timestamp", + "since", + "until", + "file_name", + "status", + "elapsed", + ] + assert len(lines) == 1 + + collector._gather_cleanup() + + +@pytest.mark.django_db +def test_activations_table_collector(default_activation: models.Activation): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.activations_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "activations_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "is_enabled", + "git_hash", + "decision_environment_id", + "project_id", + "rulebook_id", + "extra_var", + "restart_policy", + "status", + "current_job_id", + "restart_count", + "failure_count", + "is_valid", + "rulebook_name", + "rulebook_rulesets", + "ruleset_stats", + "user_id", + "created_at", + "modified_at", + "status_updated_at", + "status_message", + "latest_instance_id", + "awx_token_id", + "log_level", + "eda_system_vault_credential_id", + "k8s_service_name", + "source_mappings", + "skip_audit_events", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_activation.id) + assert lines[0][2] == default_activation.name + assert lines[0][3] == default_activation.description + + +def assert_audit_rules(expected_audit_rules): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.audit_rules_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "audit_rules_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "status", + "created_at", + "fired_at", + "rule_uuid", + "ruleset_uuid", + "ruleset_name", + "activation_instance_id", + "job_instance_id", + ] + assert len(lines) == len(expected_audit_rules) + for i, rule in enumerate(expected_audit_rules): + assert lines[i][0] == str(rule.id) + assert lines[i][2] == rule.name + assert lines[i][3] == rule.status + + +@pytest.mark.django_db +def test_single_audit_rule_table_collector( + default_audit_rule: models.AuditRule, +): + assert_audit_rules([default_audit_rule]) + + +@pytest.mark.django_db +def test_multiple_audit_rules_table_collector( + audit_rule_1: models.AuditRule, + audit_rule_2: models.AuditRule, +): + assert_audit_rules([audit_rule_1, audit_rule_2]) + + +@pytest.mark.django_db +def test_single_audit_action_table_collector( + audit_action_1: models.AuditAction, + audit_event_1: models.AuditEvent, +): + assert_audit_actions([audit_action_1]) + assert_audit_events([audit_event_1]) + + +@pytest.mark.django_db +def test_multiple_audit_action_table_collector( + audit_action_1: models.AuditAction, + audit_action_2: models.AuditAction, + audit_action_3: models.AuditAction, + audit_event_1: models.AuditEvent, + audit_event_2: models.AuditEvent, +): + assert_audit_actions([audit_action_1, audit_action_2, audit_action_3]) + assert_audit_events([audit_event_1, audit_event_2]) + + +def assert_audit_actions(expected_audit_actions): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.audit_actions_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "audit_actions_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "name", + "status", + "url", + "fired_at", + "rule_fired_at", + "status_message", + "audit_rule_id", + ] + assert len(lines) == len(expected_audit_actions) + assert sorted([line[0] for line in lines]) == sorted( + [action.id for action in expected_audit_actions] + ) + + +def assert_audit_events(expected_audit_events): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.audit_events_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "audit_events_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "source_name", + "source_type", + "received_at", + "payload", + "rule_fired_at", + ] + assert len(lines) == len(expected_audit_events) + assert sorted([line[0] for line in lines]) == sorted( + [event.id for event in expected_audit_events] + ) + + +@pytest.mark.django_db +def test_eda_credentials_table_collector( + default_eda_credential: models.EdaCredential, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.eda_credentials_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "eda_credentials_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "inputs", + "managed", + "created_at", + "modified_at", + "credential_type_id", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_eda_credential.id) + assert lines[0][2] == default_eda_credential.name + assert lines[0][3] == default_eda_credential.description + + +@pytest.mark.django_db +def test_credential_types_table_collector( + default_credential_type: models.CredentialType, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.credential_types_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "credential_types_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "name", + "description", + "inputs", + "injectors", + "managed", + "kind", + "namespace", + "created_at", + "modified_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_credential_type.id) + assert lines[0][1] == default_credential_type.name + assert lines[0][2] == default_credential_type.description + + +@pytest.mark.django_db +def test_decision_environments_table_collector( + default_decision_environment: models.DecisionEnvironment, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.decision_environments_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open( + os.path.join(tmpdir, "decision_environments_table.csv") + ) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "image_url", + "credential_id", + "eda_credential_id", + "created_at", + "modified_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_decision_environment.id) + assert lines[0][2] == default_decision_environment.name + assert lines[0][3] == default_decision_environment.description + + +@pytest.mark.django_db +def test_event_streams_table_collector( + default_event_stream: models.EventStream, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.event_streams_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "event_streams_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "event_stream_type", + "eda_credential_id", + "additional_data_headers", + "test_mode", + "test_content_type", + "test_content", + "test_headers", + "test_error_message", + "owner_id", + "uuid", + "url", + "created_at", + "modified_at", + "events_received", + "last_event_received_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_event_stream.id) + assert lines[0][2] == default_event_stream.name + assert lines[0][3] == default_event_stream.event_stream_type + + +@pytest.mark.django_db +def test_projects_table_collector( + default_project: models.Project, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.projects_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "projects_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "url", + "proxy", + "git_hash", + "verify_ssl", + "credential_id", + "eda_credential_id", + "archive_file", + "import_state", + "import_task_id", + "import_error", + "created_at", + "modified_at", + "scm_type", + "scm_branch", + "scm_refspec", + "signature_validation_credential_id", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_project.id) + assert lines[0][2] == default_project.name + assert lines[0][3] == default_project.description + + +@pytest.mark.django_db +def test_rulebooks_table_collector( + default_rulebook: models.Rulebook, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.rulebooks_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "rulebooks_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "rulesets", + "project_id", + "created_at", + "modified_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_rulebook.id) + assert lines[0][2] == default_rulebook.name + assert lines[0][3] == default_rulebook.description + + +@pytest.mark.django_db +def test_rulebook_processes_table_collector( + default_activation_instance: models.RulebookProcess, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.rulebook_processes_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "rulebook_processes_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "status", + "git_hash", + "activation_id", + "parent_type", + "started_at", + "updated_at", + "ended_at", + "activation_pod_id", + "status_message", + "log_read_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_activation_instance.id) + assert lines[0][2] == default_activation_instance.name + assert lines[0][3] == default_activation_instance.status + + +@pytest.mark.django_db +def test_organizations_table_collector( + default_organization: models.Organization, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.organizations_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "organizations_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "modified", + "modified_by_id", + "created", + "created_by_id", + "name", + "description", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_organization.id) + assert lines[0][5] == default_organization.name + assert lines[0][6] == default_organization.description + + +@pytest.mark.django_db +def test_teams_table_collector( + default_team: models.Team, +): + until = now() + time_start = until - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.teams_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "teams_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "modified", + "modified_by_id", + "created", + "created_by_id", + "name", + "description", + "organization_id", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_team.id) + assert lines[0][5] == default_team.name + assert lines[0][6] == default_team.description diff --git a/tests/integration/analytics/test_gather_analytics.py b/tests/integration/analytics/test_gather_analytics.py new file mode 100644 index 000000000..d163beac5 --- /dev/null +++ b/tests/integration/analytics/test_gather_analytics.py @@ -0,0 +1,142 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from io import StringIO + +import pytest +from django.core.management import call_command + +from aap_eda.conf import settings_registry +from aap_eda.conf.settings import application_settings + + +@pytest.fixture(autouse=True) +def register() -> None: + settings_registry.persist_registry_data() + return None + + +@pytest.fixture(autouse=True) +def use_analytic_url(settings): + settings.AUTOMATION_ANALYTICS_URL = "https://analytics_url" + + +@pytest.mark.parametrize( + "analytics_url, tracking_state, expected", + [ + (None, False, "Insights for Event Driven Ansible is not enabled."), + ( + "https://url", + False, + "Insights for Event Driven Ansible is not enabled.", + ), + (None, True, "AUTOMATION_ANALYTICS_URL is not set"), + ( + "https://url", + True, + "Analytics collection is done", + ), + ], +) +@pytest.mark.django_db +def test_gather_analytics_invalid_settings( + settings, caplog_factory, analytics_url, tracking_state, expected +): + settings.AUTOMATION_ANALYTICS_URL = analytics_url + application_settings.INSIGHTS_TRACKING_STATE = tracking_state + + out = StringIO() + logger = logging.getLogger("aap_eda.analytics") + eda_log = caplog_factory(logger) + + call_command("gather_analytics", "--ship", stdout=out) + + assert expected in eda_log.text + + +@pytest.mark.parametrize( + "args, log_level, expected", + [ + (None, "ERROR", "Either --ship or --dry-run needs to be set."), + ( + "--ship", + "INFO", + "Analytics collection is done", + ), + ( + ("--ship", "--dry-run"), + "ERROR", + "Both --ship and --dry-run cannot be processed at the same time.", + ), + ( + ("--dry-run", "--since", "2024-08-20"), + "INFO", + "No analytics collected", + ), + ( + ("--dry-run", "--since", "'2024-08-20 19:44:43.622759+00'"), + "INFO", + "No analytics collected", + ), + ( + ("--dry-run", "--since", "'2024-08-20 19:44:43'"), + "INFO", + "No analytics collected", + ), + ( + ("--dry-run", "--since", "'2024-08-20 19:44:43.622759'"), + "INFO", + "No analytics collected", + ), + ( + ("--dry-run", "--until", "2024-09-20"), + "INFO", + "No analytics collected", + ), + ( + ("--dry-run", "--until", "'2024-09-20 19:44:43.622759+00'"), + "INFO", + "No analytics collected", + ), + ( + ( + "--dry-run", + "--since", + "'2024-08-20 19:44:43'", + "--until", + "'2024-09-20 19:44:43'", + ), + "INFO", + "No analytics collected", + ), + ], +) +@pytest.mark.django_db +def test_gather_analytics_command(caplog_factory, args, log_level, expected): + application_settings.INSIGHTS_TRACKING_STATE = True + out = StringIO() + logger = logging.getLogger("aap_eda.analytics") + eda_log = caplog_factory(logger) + + command = "gather_analytics" + if args: + call_command(command, args, stdout=out) + else: + call_command(command, stdout=out) + + assert any( + record.levelname == log_level and record.message == expected + for record in eda_log.records + ) diff --git a/tests/integration/analytics/test_utils.py b/tests/integration/analytics/test_utils.py new file mode 100644 index 000000000..c8c7ea2bd --- /dev/null +++ b/tests/integration/analytics/test_utils.py @@ -0,0 +1,46 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import json + +from django.core.serializers.json import DjangoJSONEncoder + +from aap_eda.analytics.utils import datetime_hook + + +def test_datetime_hook(): + data = { + "started_at": "2024-09-13 14:42:49.188", + "ended_at": "2024-09-13 14:43:10,654", + } + data_json = json.dumps(data, cls=DjangoJSONEncoder) + + result = json.loads(data_json, object_hook=datetime_hook) + + assert isinstance(result["started_at"], datetime.datetime) is True + assert isinstance(result["ended_at"], datetime.datetime) is True + + +def test_bad_datetime_hook(): + data = { + "started_at": "2024-09-13 14:42:49.188", + "ended_at": "bad_2024-09-13 14:43:10,654", + } + data_json = json.dumps(data, cls=DjangoJSONEncoder) + + result = json.loads(data_json, object_hook=datetime_hook) + + assert isinstance(result["started_at"], datetime.datetime) is True + assert isinstance(result["ended_at"], datetime.datetime) is False diff --git a/tests/unit/test_application_settings.py b/tests/unit/test_application_settings.py index cae27881f..f98db9287 100644 --- a/tests/unit/test_application_settings.py +++ b/tests/unit/test_application_settings.py @@ -46,11 +46,13 @@ def test_read_only_application_setting(): @pytest.mark.django_db def test_application_setting_bad_type(): assert ( - settings_registry.get_setting_type("AUTOMATION_ANALYTICS_LAST_ENTRIES") - == dict + settings_registry.get_setting_type( + "AUTOMATION_ANALYTICS_GATHER_INTERVAL" + ) + == int ) with pytest.raises(InvalidValueError): - application_settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = 1 + application_settings.AUTOMATION_ANALYTICS_GATHER_INTERVAL = "bad_type" @pytest.mark.django_db