From 6f53b3f5cd7fa3e189f699658ecd228200f668b3 Mon Sep 17 00:00:00 2001 From: Hui Song Date: Tue, 10 Sep 2024 16:31:42 -0400 Subject: [PATCH] Add analytics collector to EDA --- poetry.lock | 170 ++++- pyproject.toml | 2 + src/aap_eda/analytics/analytics_collectors.py | 329 ++++++++++ src/aap_eda/analytics/collector.py | 60 ++ src/aap_eda/analytics/package.py | 33 + src/aap_eda/analytics/utils.py | 25 + .../management/commands/gather_analytics.py | 85 +++ src/aap_eda/settings/default.py | 25 + .../analytics/test_analytics_collectors.py | 585 ++++++++++++++++++ tests/integration/analytics/test_utils.py | 46 ++ 10 files changed, 1355 insertions(+), 5 deletions(-) create mode 100644 src/aap_eda/analytics/analytics_collectors.py create mode 100644 src/aap_eda/analytics/collector.py create mode 100644 src/aap_eda/analytics/package.py create mode 100644 src/aap_eda/analytics/utils.py create mode 100644 src/aap_eda/core/management/commands/gather_analytics.py create mode 100644 tests/integration/analytics/test_analytics_collectors.py create mode 100644 tests/integration/analytics/test_utils.py diff --git a/poetry.lock b/poetry.lock index bee23ed8c..b06c71435 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,9 +1,10 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "ansible-runner" version = "2.3.6" description = "\"Consistent Ansible Python API and CLI with container and process isolation runtime capabilities\"" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -22,6 +23,7 @@ six = "*" name = "appnope" version = "0.1.3" description = "Disable App Nap on macOS >= 10.9" +category = "dev" optional = false python-versions = "*" files = [ @@ -33,6 +35,7 @@ files = [ name = "asgiref" version = "3.6.0" description = "ASGI specs, helper code, and adapters" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -47,6 +50,7 @@ tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] name = "asttokens" version = "2.4.1" description = "Annotate AST trees with source code positions" +category = "dev" optional = false python-versions = "*" files = [ @@ -65,6 +69,7 @@ test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"] name = "async-timeout" version = "4.0.2" description = "Timeout context manager for asyncio programs" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -76,6 +81,7 @@ files = [ name = "attrs" version = "23.1.0" description = "Classes Without Boilerplate" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -94,6 +100,7 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte name = "autobahn" version = "24.4.2" description = "WebSocket client & server library, WAMP real-time framework" +category = "main" optional = false python-versions = ">=3.9" files = [] @@ -127,6 +134,7 @@ resolved_reference = "f38f16ba28fa253dee951068cc729089c88d857d" name = "automat" version = "22.10.0" description = "Self-service finite-state machines for the programmer on the go." +category = "main" optional = false python-versions = "*" files = [ @@ -145,6 +153,7 @@ visualize = ["Twisted (>=16.1.1)", "graphviz (>0.5.1)"] name = "black" version = "23.3.0" description = "The uncompromising code formatter." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -192,6 +201,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "cachetools" version = "3.1.1" description = "Extensible memoizing collections and decorators" +category = "main" optional = false python-versions = "*" files = [ @@ -203,6 +213,7 @@ files = [ name = "certifi" version = "2023.7.22" description = "Python package for providing Mozilla's CA Bundle." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -214,6 +225,7 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." +category = "main" optional = false python-versions = "*" files = [ @@ -290,6 +302,7 @@ pycparser = "*" name = "channels" version = "4.0.0" description = "Brings async, event-driven capabilities to Django 3.2 and up." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -310,6 +323,7 @@ tests = ["async-timeout", "coverage (>=4.5,<5.0)", "pytest", "pytest-asyncio", " name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" optional = false python-versions = ">=3.7.0" files = [ @@ -394,6 +408,7 @@ files = [ name = "click" version = "8.1.3" description = "Composable command line interface toolkit" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -408,6 +423,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -419,6 +435,7 @@ files = [ name = "constantly" version = "15.1.0" description = "Symbolic constants in Python" +category = "main" optional = false python-versions = "*" files = [ @@ -430,6 +447,7 @@ files = [ name = "coverage" version = "7.3.0" description = "Code coverage measurement for Python" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -494,6 +512,7 @@ toml = ["tomli"] name = "croniter" version = "1.3.14" description = "croniter provides iteration for datetime object with cron like format" +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -508,6 +527,7 @@ python-dateutil = "*" name = "cryptography" version = "42.0.5" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -562,6 +582,7 @@ test-randomorder = ["pytest-randomly"] name = "daphne" version = "4.0.0" description = "Django ASGI (HTTP/WebSocket) server" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -581,6 +602,7 @@ tests = ["django", "hypothesis", "pytest", "pytest-asyncio"] name = "decorator" version = "5.1.1" description = "Decorators for Humans" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -592,6 +614,7 @@ files = [ name = "defusedxml" version = "0.7.1" description = "XML bomb protection for Python stdlib modules" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" files = [ @@ -599,10 +622,23 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "django" version = "4.2.7" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -621,8 +657,9 @@ bcrypt = ["bcrypt"] [[package]] name = "django-ansible-base" -version = "2024.9.2.0.dev8+g5f718b4" +version = "2024.9.4" description = "A Django app used by ansible services" +category = "main" optional = false python-versions = ">=3.9" files = [] @@ -653,12 +690,13 @@ testing = ["cryptography", "pytest", "pytest-django"] type = "git" url = "https://github.com/ansible/django-ansible-base.git" reference = "devel" -resolved_reference = "5f718b41107f942967155dea0434e4802e3b5380" +resolved_reference = "f9b284a74ea42d4e2b2833ebcf45a6aa431f9f68" [[package]] name = "django-crum" version = "0.7.9" description = "Django middleware to capture current request and user." +category = "main" optional = false python-versions = "*" files = [ @@ -673,6 +711,7 @@ django = ">=1.8" name = "django-filter" version = "23.5" description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -687,6 +726,7 @@ Django = ">=3.2" name = "django-redis" version = "5.4.0" description = "Full featured redis cache backend for Django." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -705,6 +745,7 @@ hiredis = ["redis[hiredis] (>=3,!=4.0.0,!=4.0.1)"] name = "django-rq" version = "2.8.0" description = "An app that provides django integration for RQ (Redis Queue)" +category = "main" optional = false python-versions = "*" files = [ @@ -725,6 +766,7 @@ testing = ["mock (>=2.0.0)"] name = "django-split-settings" version = "1.2.0" description = "Organize Django settings into multiple files and directories. Easily override and modify settings. Use wildcards and optional settings files." +category = "main" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -736,6 +778,7 @@ files = [ name = "djangorestframework" version = "3.15.1" description = "Web APIs for Django, made easy." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -750,6 +793,7 @@ django = ">=3.0" name = "docutils" version = "0.20.1" description = "Docutils -- Python Documentation Utilities" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -761,6 +805,7 @@ files = [ name = "drf-spectacular" version = "0.26.5" description = "Sane and flexible OpenAPI 3 schema generation for Django REST framework" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -784,6 +829,7 @@ sidecar = ["drf-spectacular-sidecar"] name = "dynaconf" version = "3.2.4" description = "The dynamic configurator for your Python Project" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -805,6 +851,7 @@ yaml = ["ruamel.yaml"] name = "ecdsa" version = "0.19.0" description = "ECDSA cryptographic signature library (pure python)" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.6" files = [ @@ -823,6 +870,7 @@ gmpy2 = ["gmpy2"] name = "eradicate" version = "2.2.0" description = "Removes commented-out code." +category = "dev" optional = false python-versions = "*" files = [ @@ -834,6 +882,7 @@ files = [ name = "executing" version = "2.0.1" description = "Get the currently executing AST node of a frame, and other information" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -848,6 +897,7 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth name = "flake8" version = "5.0.4" description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" optional = false python-versions = ">=3.6.1" files = [ @@ -864,6 +914,7 @@ pyflakes = ">=2.5.0,<2.6.0" name = "flake8-broken-line" version = "0.6.0" description = "Flake8 plugin to forbid backslashes for line breaks" +category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -878,6 +929,7 @@ flake8 = ">=3.5,<6" name = "flake8-bugbear" version = "23.3.12" description = "A plugin for flake8 finding likely bugs and design problems in your program. Contains warnings that don't belong in pyflakes and pycodestyle." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -896,6 +948,7 @@ dev = ["coverage", "hypothesis", "hypothesmith (>=0.2)", "pre-commit", "pytest", name = "flake8-comprehensions" version = "3.12.0" description = "A flake8 plugin to help you write better list/set/dict comprehensions." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -910,6 +963,7 @@ flake8 = ">=3.0,<3.2.0 || >3.2.0" name = "flake8-debugger" version = "4.1.2" description = "ipdb/pdb statement checker plugin for flake8" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -925,6 +979,7 @@ pycodestyle = "*" name = "flake8-docstrings" version = "1.7.0" description = "Extension for flake8 which uses pydocstyle to check docstrings" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -940,6 +995,7 @@ pydocstyle = ">=2.1" name = "flake8-eradicate" version = "1.4.0" description = "Flake8 plugin to find commented out code" +category = "dev" optional = false python-versions = ">=3.7,<4.0" files = [ @@ -956,6 +1012,7 @@ flake8 = ">=3.5,<6" name = "flake8-print" version = "5.0.0" description = "print statement checker plugin for flake8" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -971,6 +1028,7 @@ pycodestyle = "*" name = "flake8-string-format" version = "0.3.0" description = "string format checker, plugin for flake8" +category = "dev" optional = false python-versions = "*" files = [ @@ -985,6 +1043,7 @@ flake8 = "*" name = "google-auth" version = "2.17.3" description = "Google Authentication Library" +category = "main" optional = false python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*" files = [ @@ -1009,6 +1068,7 @@ requests = ["requests (>=2.20.0,<3.0.0dev)"] name = "httpie" version = "3.2.3" description = "HTTPie: modern, user-friendly command-line HTTP client for the API era." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1036,6 +1096,7 @@ test = ["pytest", "pytest-httpbin (>=0.0.6)", "pytest-mock", "responses", "werkz name = "hyperlink" version = "21.0.0" description = "A featureful, immutable, and correct URL for Python." +category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1050,6 +1111,7 @@ idna = ">=2.5" name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1061,6 +1123,7 @@ files = [ name = "incremental" version = "22.10.0" description = "\"A small library that versions your Python projects.\"" +category = "main" optional = false python-versions = "*" files = [ @@ -1076,6 +1139,7 @@ scripts = ["click (>=6.0)", "twisted (>=16.4.0)"] name = "inflection" version = "0.5.1" description = "A port of Ruby on Rails inflector to Python" +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -1087,6 +1151,7 @@ files = [ name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1094,10 +1159,27 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "insights-analytics-collector" +version = "0.3.2" +description = "Collector Package for Insights for AAP" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "insights-analytics-collector-0.3.2.tar.gz", hash = "sha256:c8464e2f602b01e42574bdad85ed4c8afca9d613c8c886fcbe72f592899f520b"}, + {file = "insights_analytics_collector-0.3.2-py3-none-any.whl", hash = "sha256:0e88b938d05df83ced969fd0ee29e8452745240ae622fde75aded97a56ee1cf8"}, +] + +[package.dependencies] +django = "*" +requests = "*" + [[package]] name = "ipython" version = "8.17.2" description = "IPython: Productive Interactive Computing" +category = "dev" optional = false python-versions = ">=3.9" files = [ @@ -1134,6 +1216,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pa name = "isort" version = "5.12.0" description = "A Python utility / library to sort Python imports." +category = "dev" optional = false python-versions = ">=3.8.0" files = [ @@ -1151,6 +1234,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "jedi" version = "0.19.1" description = "An autocompletion tool for Python that can be used for text editors." +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1170,6 +1254,7 @@ testing = ["Django", "attrs", "colorama", "docopt", "pytest (<7.0.0)"] name = "jinja2" version = "3.1.3" description = "A very fast and expressive template engine." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1187,6 +1272,7 @@ i18n = ["Babel (>=2.7)"] name = "jsonschema" version = "4.17.3" description = "An implementation of JSON Schema validation for Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1206,6 +1292,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "kubernetes" version = "26.1.0" description = "Kubernetes python client" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1223,7 +1310,7 @@ requests-oauthlib = "*" setuptools = ">=21.0.0" six = ">=1.9.0" urllib3 = ">=1.24.2" -websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.dev0 || >=0.43.dev0" +websocket-client = ">=0.32.0,<0.40.0 || >0.40.0,<0.41.0 || >=0.43.0" [package.extras] adal = ["adal (>=1.0.2)"] @@ -1232,6 +1319,7 @@ adal = ["adal (>=1.0.2)"] name = "lockfile" version = "0.12.2" description = "Platform-independent file locking module" +category = "main" optional = false python-versions = "*" files = [ @@ -1243,6 +1331,7 @@ files = [ name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1267,6 +1356,7 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.3" description = "Safely add untrusted strings to HTML/XML markup." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1336,6 +1426,7 @@ files = [ name = "matplotlib-inline" version = "0.1.6" description = "Inline Matplotlib backend for Jupyter" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1350,6 +1441,7 @@ traitlets = "*" name = "mccabe" version = "0.7.0" description = "McCabe checker, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1361,6 +1453,7 @@ files = [ name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1372,6 +1465,7 @@ files = [ name = "multidict" version = "6.0.5" description = "multidict implementation" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1471,6 +1565,7 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -1482,6 +1577,7 @@ files = [ name = "oauthlib" version = "3.2.2" description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1498,6 +1594,7 @@ signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] name = "packaging" version = "23.1" description = "Core utilities for Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1509,6 +1606,7 @@ files = [ name = "parso" version = "0.8.3" description = "A Python Parser" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1524,6 +1622,7 @@ testing = ["docopt", "pytest (<6.0.0)"] name = "pathspec" version = "0.11.1" description = "Utility library for gitignore style pattern matching of file paths." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1535,6 +1634,7 @@ files = [ name = "pep8-naming" version = "0.13.3" description = "Check PEP-8 naming conventions, plugin for flake8" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1549,6 +1649,7 @@ flake8 = ">=5.0.0" name = "pexpect" version = "4.9.0" description = "Pexpect allows easy control of interactive console applications." +category = "main" optional = false python-versions = "*" files = [ @@ -1563,6 +1664,7 @@ ptyprocess = ">=0.5" name = "pip" version = "24.2" description = "The PyPA recommended tool for installing Python packages." +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -1574,6 +1676,7 @@ files = [ name = "platformdirs" version = "3.2.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1589,6 +1692,7 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.2.2)", "pytest- name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1604,6 +1708,7 @@ testing = ["pytest", "pytest-benchmark"] name = "podman" version = "4.9.0" description = "Bindings for Podman RESTful API" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -1623,6 +1728,7 @@ progress-bar = ["rich (>=12.5.1)"] name = "prompt-toolkit" version = "3.0.41" description = "Library for building powerful interactive command lines in Python" +category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -1637,6 +1743,7 @@ wcwidth = "*" name = "psycopg" version = "3.2.1" description = "PostgreSQL database adapter for Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -1660,6 +1767,7 @@ test = ["anyio (>=4.0)", "mypy (>=1.6)", "pproxy (>=2.7)", "pytest (>=6.2.5)", " name = "psycopg-binary" version = "3.2.1" description = "PostgreSQL database adapter for Python -- C optimisation distribution" +category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1722,6 +1830,7 @@ files = [ name = "ptyprocess" version = "0.7.0" description = "Run a subprocess in a pseudo terminal" +category = "main" optional = false python-versions = "*" files = [ @@ -1733,6 +1842,7 @@ files = [ name = "pure-eval" version = "0.2.2" description = "Safely evaluate AST nodes without side effects" +category = "dev" optional = false python-versions = "*" files = [ @@ -1747,6 +1857,7 @@ tests = ["pytest"] name = "pyasn1" version = "0.5.0" description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -1758,6 +1869,7 @@ files = [ name = "pyasn1-modules" version = "0.3.0" description = "A collection of ASN.1-based protocols modules" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ @@ -1772,6 +1884,7 @@ pyasn1 = ">=0.4.6,<0.6.0" name = "pycodestyle" version = "2.9.1" description = "Python style guide checker" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1783,6 +1896,7 @@ files = [ name = "pycparser" version = "2.21" description = "C parser in Python" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1794,6 +1908,7 @@ files = [ name = "pydantic" version = "1.10.7" description = "Data validation and settings management using python type hints" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1846,6 +1961,7 @@ email = ["email-validator (>=1.0.3)"] name = "pydocstyle" version = "6.3.0" description = "Python docstring style checker" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1863,6 +1979,7 @@ toml = ["tomli (>=1.2.3)"] name = "pyflakes" version = "2.5.0" description = "passive checker of Python programs" +category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1874,6 +1991,7 @@ files = [ name = "pygments" version = "2.17.1" description = "Pygments is a syntax highlighting package written in Python." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1889,6 +2007,7 @@ windows-terminal = ["colorama (>=0.4.6)"] name = "pyjwt" version = "2.8.0" description = "JSON Web Token implementation in Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1909,6 +2028,7 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] name = "pyopenssl" version = "24.1.0" description = "Python wrapper module around the OpenSSL library" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1927,6 +2047,7 @@ test = ["pretend", "pytest (>=3.0.1)", "pytest-rerunfailures"] name = "pyrsistent" version = "0.19.3" description = "Persistent/Functional/Immutable data structures" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1963,6 +2084,7 @@ files = [ name = "pysocks" version = "1.7.1" description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1975,6 +2097,7 @@ files = [ name = "pytest" version = "7.3.1" description = "pytest: simple powerful testing with Python" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1995,6 +2118,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-asyncio" version = "0.21.0" description = "Pytest support for asyncio" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2013,6 +2137,7 @@ testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy name = "pytest-cov" version = "4.1.0" description = "Pytest plugin for measuring coverage." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2031,6 +2156,7 @@ testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtuale name = "pytest-django" version = "4.5.2" description = "A Django plugin for pytest." +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2049,6 +2175,7 @@ testing = ["Django", "django-configurations (>=2.0)"] name = "pytest-lazy-fixture" version = "0.6.3" description = "It helps to use fixtures in pytest.mark.parametrize" +category = "dev" optional = false python-versions = "*" files = [ @@ -2063,6 +2190,7 @@ pytest = ">=3.2.5" name = "python-daemon" version = "3.0.1" description = "Library to implement a well-behaved Unix daemon process." +category = "main" optional = false python-versions = ">=3" files = [ @@ -2083,6 +2211,7 @@ test = ["coverage", "docutils", "testscenarios (>=0.4)", "testtools"] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" +category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -2097,6 +2226,7 @@ six = ">=1.5" name = "python-gnupg" version = "0.5.2" description = "A wrapper for the Gnu Privacy Guard (GPG or GnuPG)" +category = "main" optional = false python-versions = "*" files = [ @@ -2108,6 +2238,7 @@ files = [ name = "pyxdg" version = "0.28" description = "PyXDG contains implementations of freedesktop.org standards in python." +category = "main" optional = false python-versions = "*" files = [ @@ -2119,6 +2250,7 @@ files = [ name = "pyyaml" version = "6.0.2" description = "YAML parser and emitter for Python" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2181,6 +2313,7 @@ files = [ name = "redis" version = "4.5.4" description = "Python client for Redis database and key-value store" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2199,6 +2332,7 @@ ocsp = ["cryptography (>=36.0.1)", "pyopenssl (==20.0.1)", "requests (>=2.26.0)" name = "requests" version = "2.31.0" description = "Python HTTP for Humans." +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2221,6 +2355,7 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-mock" version = "1.12.1" description = "Mock out responses from the requests package" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2238,6 +2373,7 @@ fixture = ["fixtures"] name = "requests-oauthlib" version = "1.3.1" description = "OAuthlib authentication support for Requests." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2256,6 +2392,7 @@ rsa = ["oauthlib[signedtoken] (>=3.0.0)"] name = "requests-toolbelt" version = "1.0.0" description = "A utility belt for advanced users of python-requests" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2270,6 +2407,7 @@ requests = ">=2.0.1,<3.0.0" name = "rich" version = "13.8.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -2288,6 +2426,7 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "rq" version = "1.13.0" description = "RQ is a simple, lightweight, library for creating background jobs, and processing them." +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2303,6 +2442,7 @@ redis = ">=3.5.0" name = "rq-scheduler" version = "0.10.0" description = "Provides job scheduling capabilities to RQ (Redis Queue)" +category = "main" optional = false python-versions = "*" files = [ @@ -2318,6 +2458,7 @@ rq = ">=0.13" name = "rsa" version = "4.9" description = "Pure-Python RSA implementation" +category = "main" optional = false python-versions = ">=3.6,<4" files = [ @@ -2332,6 +2473,7 @@ pyasn1 = ">=0.1.3" name = "ruff" version = "0.0.262" description = "An extremely fast Python linter, written in Rust." +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -2358,6 +2500,7 @@ files = [ name = "service-identity" version = "21.1.0" description = "Service identity verification for pyOpenSSL & cryptography." +category = "main" optional = false python-versions = "*" files = [ @@ -2382,6 +2525,7 @@ tests = ["coverage[toml] (>=5.0.2)", "pytest"] name = "setuptools" version = "67.7.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2398,6 +2542,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -2409,6 +2554,7 @@ files = [ name = "snowballstemmer" version = "2.2.0" description = "This package provides 29 stemmers for 28 languages generated from Snowball algorithms." +category = "dev" optional = false python-versions = "*" files = [ @@ -2420,6 +2566,7 @@ files = [ name = "sqlparse" version = "0.4.4" description = "A non-validating SQL parser." +category = "main" optional = false python-versions = ">=3.5" files = [ @@ -2436,6 +2583,7 @@ test = ["pytest", "pytest-cov"] name = "stack-data" version = "0.6.3" description = "Extract data from python stack frames and tracebacks for informative displays" +category = "dev" optional = false python-versions = "*" files = [ @@ -2455,6 +2603,7 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] name = "traitlets" version = "5.13.0" description = "Traitlets Python configuration system" +category = "dev" optional = false python-versions = ">=3.8" files = [ @@ -2470,6 +2619,7 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.6.0)", "pre-commit", "pytest (>=7.0, name = "twisted" version = "22.10.0" description = "An asynchronous networking framework written in Python" +category = "main" optional = false python-versions = ">=3.7.1" files = [ @@ -2511,6 +2661,7 @@ windows-platform = ["PyHamcrest (>=1.9.0)", "appdirs (>=1.4.0)", "bcrypt (>=3.0. name = "twisted-iocpsupport" version = "1.0.3" description = "An extension for use in the twisted I/O Completion Ports reactor." +category = "main" optional = false python-versions = "*" files = [ @@ -2536,6 +2687,7 @@ files = [ name = "txaio" version = "23.1.1" description = "Compatibility API between asyncio/Twisted/Trollius" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2552,6 +2704,7 @@ twisted = ["twisted (>=20.3.0)", "zope.interface (>=5.2.0)"] name = "typing-extensions" version = "4.12.2" description = "Backported and Experimental Type Hints for Python 3.8+" +category = "main" optional = false python-versions = ">=3.8" files = [ @@ -2563,6 +2716,7 @@ files = [ name = "tzdata" version = "2023.3" description = "Provider of IANA time zone data" +category = "main" optional = false python-versions = ">=2" files = [ @@ -2574,6 +2728,7 @@ files = [ name = "uritemplate" version = "4.1.1" description = "Implementation of RFC 6570 URI Templates" +category = "main" optional = false python-versions = ">=3.6" files = [ @@ -2585,6 +2740,7 @@ files = [ name = "urllib3" version = "1.26.15" description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" files = [ @@ -2601,6 +2757,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] name = "wcwidth" version = "0.2.10" description = "Measures the displayed width of unicode strings in a terminal" +category = "dev" optional = false python-versions = "*" files = [ @@ -2612,6 +2769,7 @@ files = [ name = "websocket-client" version = "1.5.1" description = "WebSocket client for Python with low level API options" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2628,6 +2786,7 @@ test = ["websockets"] name = "xxhash" version = "3.4.1" description = "Python binding for xxHash" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2745,6 +2904,7 @@ files = [ name = "zope-interface" version = "6.0" description = "Interfaces for Python" +category = "main" optional = false python-versions = ">=3.7" files = [ @@ -2795,4 +2955,4 @@ dev = ["psycopg-binary"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "78e2f602f1583537aae38421cf838d9d494b32d34c1e0877e22d2891753d614c" +content-hash = "730133e85efbdd484d267aeee8d34d057c4b12a4aefe0f282522a179a2e21179" diff --git a/pyproject.toml b/pyproject.toml index 17c099989..bc0c9ad86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,8 @@ psycopg = "^3.1.17" xxhash = "*" pyjwt = { version = "*", extras = ["crypto"] } ecdsa = "*" +insights-analytics-collector = "^0.3.2" +distro = "^1.9.0" [tool.poetry.group.test.dependencies] pytest = "*" diff --git a/src/aap_eda/analytics/analytics_collectors.py b/src/aap_eda/analytics/analytics_collectors.py new file mode 100644 index 000000000..01e75198f --- /dev/null +++ b/src/aap_eda/analytics/analytics_collectors.py @@ -0,0 +1,329 @@ +import os +import platform +from datetime import datetime + +import distro +from ansible_base.resource_registry.models.service_identifier import service_id +from django.conf import settings +from django.db.models import Manager, Q +from insights_analytics_collector import CsvFileSplitter, register + +from aap_eda.analytics.collector import AnalyticsCollector +from aap_eda.core import models +from aap_eda.utils import get_eda_version + + +@register( + "config", + "1.0", + description="General platform configuration.", + config=True, +) +def config(**kwargs) -> dict: + install_type = "traditional" + if os.environ.get("container") == "oci": + install_type = "openshift" + elif "KUBERNETES_SERVICE_PORT" in os.environ: + install_type = "k8s" + return { + "install_uuid": service_id(), + "platform": { + "system": platform.system(), + "dist": distro.linux_distribution(), + "release": platform.release(), + "type": install_type, + }, + # skip license related info so far + "eda_log_level": settings.APP_LOG_LEVEL, + "eda_version": get_eda_version(), + "eda_deployment_type": settings.DEPLOYMENT_TYPE, + } + + +@register( + "activations_table", + "1.0", + format="csv", + description="Data on activations", +) +def activations_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.Activation.objects, since, until) + + return _copy_table("activations", query, full_path) + + +@register( + "audit_action_table", + "1.0", + format="csv", + description="Data on audit_actions", +) +def audit_actions_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + audit_actions = _get_audit_action_qs(since, until) + + audit_action_query = ( + f"COPY ({audit_actions.query}) TO STDOUT WITH CSV HEADER" + ) + + return _copy_table("audit_actions", audit_action_query, full_path) + + +@register( + "audit_event_table", + "1.0", + format="csv", + description="Data on audit_events", +) +def audit_events_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + audit_actions = _get_audit_action_qs(since, until) + audit_event_query = _get_audit_event_query(audit_actions) + + return _copy_table("audit_events", audit_event_query, full_path) + + +@register( + "audit_rule_table", + "1.0", + format="csv", + description="Data on audit_rules", +) +def audit_rules_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + audit_rules = _get_audit_rule_qs(since, until) + audit_rule_query = f"COPY ({audit_rules.query}) TO STDOUT WITH CSV HEADER" + + return _copy_table("audit_rules", audit_rule_query, full_path) + + +@register( + "eda_credential_table", + "1.0", + format="csv", + description="Data on eda_credentials", +) +def eda_credentials_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.EdaCredential.objects, since, until) + + return _copy_table("eda_credentials", query, full_path) + + +@register( + "credential_type_table", + "1.0", + format="csv", + description="Data on credential_types", +) +def credential_types_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.CredentialType.objects, since, until) + + return _copy_table("credential_types", query, full_path) + + +@register( + "decision_environment_table", + "1.0", + format="csv", + description="Data on decision_environments", +) +def decision_environments_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.DecisionEnvironment.objects, since, until) + return _copy_table("decision_environments", query, full_path) + + +@register( + "event_stream_table", + "1.0", + format="csv", + description="Data on event_streams", +) +def event_streams_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.EventStream.objects, since, until) + return _copy_table("event_streams", query, full_path) + + +@register( + "project_table", + "1.0", + format="csv", + description="Data on projects", +) +def projects_table(since: datetime, full_path: str, until: datetime, **kwargs): + query = _get_query(models.Project.objects, since, until) + return _copy_table("projects", query, full_path) + + +@register( + "rulebook_table", + "1.0", + format="csv", + description="Data on rulebooks", +) +def rulebooks_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + query = _get_query(models.Rulebook.objects, since, until) + return _copy_table("rulebooks", query, full_path) + + +@register( + "rulebook_process_table", + "1.0", + format="csv", + description="Data on rulebook_processes", +) +def rulebook_processes_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + args = {"started_at": True} + query = _get_query(models.RulebookProcess.objects, since, until, **args) + return _copy_table("rulebook_processes", query, full_path) + + +@register( + "organization_table", + "1.0", + format="csv", + description="Data on organizations", +) +def organizations_table( + since: datetime, full_path: str, until: datetime, **kwargs +): + args = {"created": True} + query = _get_query(models.Organization.objects, since, until, **args) + return _copy_table("organizations", query, full_path) + + +@register( + "team_table", + "1.0", + format="csv", + description="Data on teams", +) +def teams_table(since: datetime, full_path: str, until: datetime, **kwargs): + args = {"created": True} + query = _get_query(models.Team.objects, since, until, **args) + + return _copy_table("teams", query, full_path) + + +def _datetime_format(timestamp: datetime) -> str: + """Convert datetime object to string.""" + iso_format = timestamp.strftime("%Y-%m-%d %H:%M:%S.%f%z") + return iso_format[:-2] + ":" + iso_format[-2:] + + +def _get_query( + objects: Manager, since: datetime, until: datetime, **kwargs +) -> str: + """Construct sql query with datetime params.""" + if kwargs.get("started_at"): + qs = objects.filter( + Q(started_at__gt=since, started_at__lte=until) + | Q(updated_at__gt=since, updated_at__lte=until) + ).order_by("id") + elif kwargs.get("created"): + qs = objects.filter( + Q(created__gt=since, created__lte=until) + | Q(modified__gt=since, modified__lte=until) + ).order_by("id") + else: + qs = objects.filter( + Q(created_at__gt=since, created_at__lte=until) + | Q(modified_at__gt=since, modified_at__lte=until) + ).order_by("id") + query = ( + str(qs.query) + .replace(_datetime_format(since), f"'{since.isoformat()}'") + .replace(_datetime_format(until), f"'{until.isoformat()}'") + ) + + return f"COPY ({query}) TO STDOUT WITH CSV HEADER" + + +def _get_audit_event_query(actions: list[models.AuditAction]): + events = models.AuditEvent.objects.none() + for action in actions: + events |= action.audit_events.all() + + query = str(events.distinct().query) + + for action in actions: + query = query.replace(str(action.id), f"'{action.id}'") + + return f"COPY ({query}) TO STDOUT WITH CSV HEADER" + + +def _get_audit_rule_qs(since: datetime, until: datetime): + activation_instance_ids = ( + models.RulebookProcess.objects.filter( + Q( + started_at__gt=since.isoformat(), + started_at__lte=until.isoformat(), + ) + | Q( + updated_at__gt=since.isoformat(), + updated_at__lte=until.isoformat(), + ) + ) + .values_list("id", flat=True) + .distinct() + ) + + if len(activation_instance_ids) == 0: + return [] + + if len(activation_instance_ids) == 1: + audit_rules = models.AuditRule.objects.filter( + activation_instance_id=activation_instance_ids[0] + ).order_by("id") + else: + audit_rules = models.AuditRule.objects.filter( + activation_instance_id__in=tuple(activation_instance_ids) + ).order_by("id") + + return audit_rules + + +def _get_audit_action_qs(since: datetime, until: datetime): + audit_rules = _get_audit_rule_qs(since, until) + audit_rule_ids = audit_rules.values_list("id").distinct() + + if len(audit_rule_ids) == 0: + return [] + + if len(audit_rule_ids) == 1: + audit_actions = models.AuditAction.objects.filter( + audit_rule_id=audit_rule_ids[0], + ).order_by("id") + else: + audit_actions = models.AuditAction.objects.filter( + audit_rule_id__in=tuple(audit_rule_ids) + ).order_by("id") + + return audit_actions + + +def _copy_table(table, query, path): + file_path = os.path.join(path, table + "_table.csv") + file = CsvFileSplitter(filespec=file_path) + with AnalyticsCollector.db_connection().cursor() as cursor: + with cursor.copy(query) as copy: + while data := copy.read(): + byte_data = bytes(data) + file.write(byte_data.decode()) + return file.file_list() diff --git a/src/aap_eda/analytics/collector.py b/src/aap_eda/analytics/collector.py new file mode 100644 index 000000000..3545df621 --- /dev/null +++ b/src/aap_eda/analytics/collector.py @@ -0,0 +1,60 @@ +import json + +from django.conf import settings +from django.core.serializers.json import DjangoJSONEncoder +from django.db import connection +from insights_analytics_collector import Collector + +from aap_eda.analytics.package import Package +from aap_eda.analytics.utils import datetime_hook + + +class AnalyticsCollector(Collector): + @staticmethod + def db_connection(): + return connection + + @staticmethod + def _package_class(): + return Package + + def get_last_gathering(self): + return self._last_gathering() + + def _is_shipping_configured(self): + if not settings.INSIGHTS_TRACKING_STATE: + self.logger.warning( + "Insights for Event Driven Ansible is not enabled." + ) + return False + + return True + + def _is_valid_license(self): + # ignore license information checking for now + return True + + def _last_gathering(self): + return settings.AUTOMATION_ANALYTICS_LAST_GATHER + + def _load_last_gathered_entries(self): + last_entries = settings.AUTOMATION_ANALYTICS_LAST_ENTRIES + + return json.loads( + last_entries.value + if last_entries and last_entries.value + else "{}", # noqa: P103 + object_hook=datetime_hook, + ) + + def _save_last_gathered_entries(self, last_gathered_entries): + self.logger.info(f"Save last_entries: {last_gathered_entries}") + + settings.AUTOMATION_ANALYTICS_LAST_ENTRIES = json.dumps( + last_gathered_entries, cls=DjangoJSONEncoder + ) + + def _save_last_gather(self): + self.logger.info(f"Save last_gather: {self.gather_until}") + + settings.AUTOMATION_ANALYTICS_LAST_GATHER = self.gather_until diff --git a/src/aap_eda/analytics/package.py b/src/aap_eda/analytics/package.py new file mode 100644 index 000000000..d69ce09b8 --- /dev/null +++ b/src/aap_eda/analytics/package.py @@ -0,0 +1,33 @@ +import logging + +from django.conf import settings +from insights_analytics_collector import Package as InsightsAnalyticsPackage + +logger = logging.getLogger(__name__) + + +class Package(InsightsAnalyticsPackage): + PAYLOAD_CONTENT_TYPE = "application/vnd.redhat.aap-eda.filename+tgz" + CERT_PATH = settings.INSIGHTS_CERT_PATH + + def _tarname_base(self): + timestamp = self.collector.gather_until + return f'eda-analytics-{timestamp.strftime("%Y-%m-%d-%H%M%S%z")}' + + def get_ingress_url(self): + return settings.AUTOMATION_ANALYTICS_URL + + def shipping_auth_mode(self): + return settings.AUTOMATION_AUTH_METHOD + + def _get_rh_user(self): + return settings.REDHAT_USERNAME + + def _get_rh_password(self): + return settings.REDHAT_PASSWORD + + def _get_http_request_headers(self): + return { + "Content-Type": self.PAYLOAD_CONTENT_TYPE, + "User-Agent": "EDA-metrics-agent", + } diff --git a/src/aap_eda/analytics/utils.py b/src/aap_eda/analytics/utils.py new file mode 100644 index 000000000..9916da855 --- /dev/null +++ b/src/aap_eda/analytics/utils.py @@ -0,0 +1,25 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from django.utils.dateparse import parse_datetime + + +def datetime_hook(d): + new_d = {} + for key, value in d.items(): + try: + new_d[key] = parse_datetime(value) + except TypeError: + new_d[key] = value + return new_d diff --git a/src/aap_eda/core/management/commands/gather_analytics.py b/src/aap_eda/core/management/commands/gather_analytics.py new file mode 100644 index 000000000..d55b8f7d1 --- /dev/null +++ b/src/aap_eda/core/management/commands/gather_analytics.py @@ -0,0 +1,85 @@ +import logging + +from dateutil import parser +from django.core.management.base import BaseCommand +from django.utils import timezone + +from aap_eda.analytics import analytics_collectors +from aap_eda.analytics.collector import AnalyticsCollector + + +class Command(BaseCommand): + """Collect analytics data.""" + + help = "Collect analytics data" + + def add_arguments(self, parser): + parser.add_argument( + "--dry-run", + dest="dry-run", + action="store_true", + help=( + "Gather analytics without shipping. Works even if analytics" + " are disabled in settings." + ), + ) + parser.add_argument( + "--ship", + dest="ship", + action="store_true", + help="Enable to ship metrics to the Red Hat Cloud", + ) + parser.add_argument( + "--since", + dest="since", + action="store", + help="Start date for collection", + ) + parser.add_argument( + "--until", + dest="until", + action="store", + help="End date for collection", + ) + + def init_logging(self): + self.logger = logging.getLogger("aap_eda.analytics") + handler = logging.StreamHandler() + handler.setLevel(logging.DEBUG) + handler.setFormatter(logging.Formatter("%(message)s")) + self.logger.addHandler(handler) + self.logger.propagate = False + + def handle(self, *args, **options): + self.init_logging() + opt_ship = options.get("ship") + opt_dry_run = options.get("dry-run") + opt_since = options.get("since") or None + opt_until = options.get("until") or None + + since = parser.parse(opt_since) if opt_since else None + if since and since.tzinfo is None: + since = since.replace(tzinfo=timezone.utc) + until = parser.parse(opt_until) if opt_until else None + if until and until.tzinfo is None: + until = until.replace(tzinfo=timezone.utc) + + if opt_ship and opt_dry_run: + self.logger.error( + "Both --ship and --dry-run cannot be processed " + "at the same time." + ) + return + + collector = AnalyticsCollector( + collector_module=analytics_collectors, + collection_type="manual" if opt_ship else "dry-run", + logger=self.logger, + ) + tgzfiles = collector.gather(since=since, until=until) + + if tgzfiles: + for tgz in tgzfiles: + self.logger.info(tgz) + else: + self.logger.error("No analytics collected") diff --git a/src/aap_eda/settings/default.py b/src/aap_eda/settings/default.py index ab06402f9..8326060bf 100644 --- a/src/aap_eda/settings/default.py +++ b/src/aap_eda/settings/default.py @@ -765,3 +765,28 @@ def get_rulebook_process_log_level() -> RulebookProcessLogLevel: MAX_PG_NOTIFY_MESSAGE_SIZE = int( settings.get("MAX_PG_NOTIFY_MESSAGE_SIZE", 6144) ) + +# -------------------------------------------------------- +# METRICS COLLECTIONS: +# will be updated when application_settings are available +# -------------------------------------------------------- +INSIGHTS_TRACKING_STATE = settings.get("INSIGHTS_TRACKING_STATE", True) +INSIGHTS_CERT_PATH = settings.get( + "INSIGHTS_CERT_PATH", "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem" +) +AUTOMATION_ANALYTICS_URL = settings.get( + "AUTOMATION_ANALYTICS_URL", + "https://console.stage.redhat.com/api/ingress/v1/upload", +) +AUTOMATION_ANALYTICS_LAST_GATHER = settings.get( + "AUTOMATION_ANALYTICS_LAST_GATHER", None +) +AUTOMATION_ANALYTICS_LAST_ENTRIES = settings.get( + "AUTOMATION_ANALYTICS_LAST_ENTRIES", None +) +REDHAT_USERNAME = settings.get("REDHAT_USERNAME", "unknown") +REDHAT_PASSWORD = settings.get("REDHAT_PASSWORD", "unknown") + +# Available methods: +# https://github.com/RedHatInsights/insights-analytics-collector/blob/main/insights_analytics_collector/package.py#L27 +AUTOMATION_AUTH_METHOD = settings.get("AUTOMATION_AUTH_METHOD", "user-pass") diff --git a/tests/integration/analytics/test_analytics_collectors.py b/tests/integration/analytics/test_analytics_collectors.py new file mode 100644 index 000000000..67a54c32e --- /dev/null +++ b/tests/integration/analytics/test_analytics_collectors.py @@ -0,0 +1,585 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import csv +import io +import json +import os +import tarfile +import tempfile +from datetime import timedelta + +import pytest +from django.utils.timezone import now +from insights_analytics_collector import Collector + +from aap_eda.analytics import analytics_collectors as collectors +from aap_eda.analytics.collector import AnalyticsCollector +from aap_eda.core import models + + +@pytest.mark.django_db +def test_internal_infra_files(): + collector = AnalyticsCollector( + collection_type=Collector.DRY_RUN, collector_module=collectors + ) + time_start = now() - timedelta(hours=9) + + tgz_files = collector.gather( + since=time_start, until=now() + timedelta(seconds=1) + ) + + assert len(tgz_files) == 1 + + files = {} + with tarfile.open(tgz_files[0], "r:gz") as archive: + for member in archive.getmembers(): + files[member.name] = archive.extractfile(member) + + assert "./config.json" in files + assert "./manifest.json" in files + assert "./data_collection_status.csv" in files + + config_json = json.loads(files["./config.json"].read()) + manifest_json = json.loads(files["./manifest.json"].read()) + data_collection_status_csv = io.BytesIO( + files["./data_collection_status.csv"].read() + ) + data_collection_status = io.TextIOWrapper( + data_collection_status_csv, encoding="utf-8" + ) + + for key in config_json.keys(): + assert key in [ + "install_uuid", + "platform", + "eda_log_level", + "eda_version", + "eda_deployment_type", + ] + assert manifest_json["config.json"] == "1.0" + assert manifest_json["data_collection_status.csv"] == "1.0" + + reader = csv.reader(data_collection_status) + header = next(reader) + lines = list(reader) + + assert header == [ + "collection_start_timestamp", + "since", + "until", + "file_name", + "status", + "elapsed", + ] + assert len(lines) == 1 + + collector._gather_cleanup() + + +@pytest.mark.django_db +def test_activations_table_collector(default_activation: models.Activation): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.activations_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "activations_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "is_enabled", + "git_hash", + "decision_environment_id", + "project_id", + "rulebook_id", + "extra_var", + "restart_policy", + "status", + "current_job_id", + "restart_count", + "failure_count", + "is_valid", + "rulebook_name", + "rulebook_rulesets", + "ruleset_stats", + "user_id", + "created_at", + "modified_at", + "status_updated_at", + "status_message", + "latest_instance_id", + "awx_token_id", + "log_level", + "eda_system_vault_credential_id", + "k8s_service_name", + "source_mappings", + "skip_audit_events", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_activation.id) + assert lines[0][2] == default_activation.name + assert lines[0][3] == default_activation.description + + +def assert_audit_rules(expected_audit_rules): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.audit_rules_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "audit_rules_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "status", + "created_at", + "fired_at", + "rule_uuid", + "ruleset_uuid", + "ruleset_name", + "activation_instance_id", + "job_instance_id", + ] + assert len(lines) == len(expected_audit_rules) + for i, rule in enumerate(expected_audit_rules): + assert lines[i][0] == str(rule.id) + assert lines[i][2] == rule.name + assert lines[i][3] == rule.status + + +@pytest.mark.django_db +def test_single_audit_rule_table_collector( + default_audit_rule: models.AuditRule, +): + assert_audit_rules([default_audit_rule]) + + +@pytest.mark.django_db +def test_multiple_audit_rules_table_collector( + audit_rule_1: models.AuditRule, + audit_rule_2: models.AuditRule, +): + assert_audit_rules([audit_rule_1, audit_rule_2]) + + +@pytest.mark.django_db +def test_single_audit_action_table_collector( + audit_action_1: models.AuditAction, + audit_event_1: models.AuditEvent, +): + assert_audit_actions([audit_action_1]) + assert_audit_events([audit_event_1]) + + +@pytest.mark.django_db +def test_multiple_audit_action_table_collector( + audit_action_1: models.AuditAction, + audit_action_2: models.AuditAction, + audit_action_3: models.AuditAction, + audit_event_1: models.AuditEvent, + audit_event_2: models.AuditEvent, +): + assert_audit_actions([audit_action_1, audit_action_2, audit_action_3]) + assert_audit_events([audit_event_1, audit_event_2]) + + +def assert_audit_actions(expected_audit_actions): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.audit_actions_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "audit_actions_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "name", + "status", + "url", + "fired_at", + "rule_fired_at", + "status_message", + "audit_rule_id", + ] + assert len(lines) == len(expected_audit_actions) + assert sorted([line[0] for line in lines]) == sorted( + [action.id for action in expected_audit_actions] + ) + + +def assert_audit_events(expected_audit_events): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.audit_events_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "audit_events_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "source_name", + "source_type", + "received_at", + "payload", + "rule_fired_at", + ] + assert len(lines) == len(expected_audit_events) + assert sorted([line[0] for line in lines]) == sorted( + [event.id for event in expected_audit_events] + ) + + +@pytest.mark.django_db +def test_eda_credentials_table_collector( + default_eda_credential: models.EdaCredential, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.eda_credentials_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "eda_credentials_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "inputs", + "managed", + "created_at", + "modified_at", + "credential_type_id", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_eda_credential.id) + assert lines[0][2] == default_eda_credential.name + assert lines[0][3] == default_eda_credential.description + + +@pytest.mark.django_db +def test_credential_types_table_collector( + default_credential_type: models.CredentialType, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.credential_types_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "credential_types_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "name", + "description", + "inputs", + "injectors", + "managed", + "kind", + "namespace", + "created_at", + "modified_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_credential_type.id) + assert lines[0][1] == default_credential_type.name + assert lines[0][2] == default_credential_type.description + + +@pytest.mark.django_db +def test_decision_environments_table_collector( + default_decision_environment: models.DecisionEnvironment, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.decision_environments_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open( + os.path.join(tmpdir, "decision_environments_table.csv") + ) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "image_url", + "credential_id", + "eda_credential_id", + "created_at", + "modified_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_decision_environment.id) + assert lines[0][2] == default_decision_environment.name + assert lines[0][3] == default_decision_environment.description + + +@pytest.mark.django_db +def test_event_streams_table_collector( + default_event_stream: models.EventStream, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.event_streams_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "event_streams_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "event_stream_type", + "eda_credential_id", + "additional_data_headers", + "test_mode", + "test_content_type", + "test_content", + "test_headers", + "test_error_message", + "owner_id", + "uuid", + "url", + "created_at", + "modified_at", + "events_received", + "last_event_received_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_event_stream.id) + assert lines[0][2] == default_event_stream.name + assert lines[0][3] == default_event_stream.event_stream_type + + +@pytest.mark.django_db +def test_projects_table_collector( + default_project: models.Project, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.projects_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "projects_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "url", + "proxy", + "git_hash", + "verify_ssl", + "credential_id", + "eda_credential_id", + "archive_file", + "import_state", + "import_task_id", + "import_error", + "created_at", + "modified_at", + "scm_type", + "scm_branch", + "scm_refspec", + "signature_validation_credential_id", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_project.id) + assert lines[0][2] == default_project.name + assert lines[0][3] == default_project.description + + +@pytest.mark.django_db +def test_rulebooks_table_collector( + default_rulebook: models.Rulebook, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.rulebooks_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "rulebooks_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "description", + "rulesets", + "project_id", + "created_at", + "modified_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_rulebook.id) + assert lines[0][2] == default_rulebook.name + assert lines[0][3] == default_rulebook.description + + +@pytest.mark.django_db +def test_rulebook_processes_table_collector( + default_activation_instance: models.RulebookProcess, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.rulebook_processes_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "rulebook_processes_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "organization_id", + "name", + "status", + "git_hash", + "activation_id", + "parent_type", + "started_at", + "updated_at", + "ended_at", + "activation_pod_id", + "status_message", + "log_read_at", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_activation_instance.id) + assert lines[0][2] == default_activation_instance.name + assert lines[0][3] == default_activation_instance.status + + +@pytest.mark.django_db +def test_organizations_table_collector( + default_organization: models.Organization, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.organizations_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "organizations_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "modified", + "modified_by_id", + "created", + "created_by_id", + "name", + "description", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_organization.id) + assert lines[0][5] == default_organization.name + assert lines[0][6] == default_organization.description + + +@pytest.mark.django_db +def test_teams_table_collector( + default_team: models.Team, +): + time_start = now() - timedelta(hours=9) + + with tempfile.TemporaryDirectory() as tmpdir: + collectors.teams_table( + time_start, tmpdir, until=now() + timedelta(seconds=1) + ) + with open(os.path.join(tmpdir, "teams_table.csv")) as f: + reader = csv.reader(f) + + header = next(reader) + lines = list(reader) + + assert header == [ + "id", + "modified", + "modified_by_id", + "created", + "created_by_id", + "name", + "description", + "organization_id", + ] + assert len(lines) == 1 + assert lines[0][0] == str(default_team.id) + assert lines[0][5] == default_team.name + assert lines[0][6] == default_team.description diff --git a/tests/integration/analytics/test_utils.py b/tests/integration/analytics/test_utils.py new file mode 100644 index 000000000..c8c7ea2bd --- /dev/null +++ b/tests/integration/analytics/test_utils.py @@ -0,0 +1,46 @@ +# Copyright 2024 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import json + +from django.core.serializers.json import DjangoJSONEncoder + +from aap_eda.analytics.utils import datetime_hook + + +def test_datetime_hook(): + data = { + "started_at": "2024-09-13 14:42:49.188", + "ended_at": "2024-09-13 14:43:10,654", + } + data_json = json.dumps(data, cls=DjangoJSONEncoder) + + result = json.loads(data_json, object_hook=datetime_hook) + + assert isinstance(result["started_at"], datetime.datetime) is True + assert isinstance(result["ended_at"], datetime.datetime) is True + + +def test_bad_datetime_hook(): + data = { + "started_at": "2024-09-13 14:42:49.188", + "ended_at": "bad_2024-09-13 14:43:10,654", + } + data_json = json.dumps(data, cls=DjangoJSONEncoder) + + result = json.loads(data_json, object_hook=datetime_hook) + + assert isinstance(result["started_at"], datetime.datetime) is True + assert isinstance(result["ended_at"], datetime.datetime) is False