Update catalog to non-legacy environment names and point to new airfl…

…ow.openverse.org (#4053) * Update catalog to non-legacy environment names * Remove errant pytest configuration * Update catalog/env.template Co-authored-by: Olga Bulat <obulat@gmail.com> --------- Co-authored-by: Olga Bulat <obulat@gmail.com>
WordPress · Apr 18, 2024 · 51fd235 · 51fd235
1 parent 092ad1a
commit 51fd235
Show file tree

Hide file tree

Showing 12 changed files with 73 additions and 57 deletions.
diff --git a/api/api/templates/admin/base_site.html b/api/api/templates/admin/base_site.html
@@ -15,7 +15,7 @@
         <li{% if message.tags %} class="{{ message.tags }}"{% endif %}>{{ message|capfirst }}</li>
     {% endfor %}
         <li class="warning">Next staging database restore will occur in <span id="staging-db-refresh-days"></span> days.
-            <a href="https://airflow.openverse.engineering/dags/staging_database_restore/grid">
+            <a href="https://airflow.openverse.org/dags/staging_database_restore/grid">
                 View the DAG for information on how to skip this process.
             </a>
         </li>

diff --git a/catalog/dags/common/slack.py b/catalog/dags/common/slack.py
@@ -299,11 +299,15 @@ def should_send_message(
         return False
 
     # Exit early if we aren't on production or if force alert is not set
-    environment = Variable.get("ENVIRONMENT", default_var="dev")
+    environment = Variable.get("ENVIRONMENT", default_var="local")
     force_message = Variable.get(
         "SLACK_MESSAGE_OVERRIDE", default_var=False, deserialize_json=True
     )
-    if not (environment == "prod" or force_message):
+
+    # prevent circular import
+    from common.constants import PRODUCTION
+
+    if not (environment == PRODUCTION or force_message):
         log.info(
             f"Skipping Slack notification for {dag_id}:{task_id} in"
             f" `{environment}` environment. To send the notification, enable"
@@ -332,7 +336,7 @@ def send_message(
     ):
         return
 
-    environment = Variable.get("ENVIRONMENT", default_var="dev")
+    environment = Variable.get("ENVIRONMENT", default_var="local")
     s = SlackMessage(
         f"{username} | {environment}",
         icon_emoji,

diff --git a/catalog/dags/database/staging_database_restore/constants.py b/catalog/dags/database/staging_database_restore/constants.py
@@ -1,6 +1,9 @@
 _ID_FORMAT = "{}-openverse-db"
 
 DAG_ID = "staging_database_restore"
+# These identifiers must match the existing databases,
+# and follow the legacy environment names, where
+# "prod" is used for production and "dev" is used for staging
 PROD_IDENTIFIER = _ID_FORMAT.format("prod")
 STAGING_IDENTIFIER = _ID_FORMAT.format("dev")
 TEMP_IDENTIFIER = _ID_FORMAT.format("dev-next")

diff --git a/catalog/dags/maintenance/pr_review_reminders/pr_review_reminders_dag.py b/catalog/dags/maintenance/pr_review_reminders/pr_review_reminders_dag.py
@@ -60,6 +60,6 @@
         op_kwargs={
             "github_pat": "{{ var.value.get('GITHUB_API_KEY', 'not_set') }}",
             "dry_run": "{{ var.json.get('PR_REVIEW_REMINDER_DRY_RUN', "
-            "var.value.ENVIRONMENT != 'prod') }}",
+            "var.value.ENVIRONMENT != 'production') }}",
         },
     )
diff --git a/catalog/env.template b/catalog/env.template
@@ -20,7 +20,7 @@ AIRFLOW__WEBSERVER__SECRET_KEY=sample-secret-key=
 # Executor to use
 AIRFLOW__CORE__EXECUTOR=LocalExecutor
 # Environment this instance is being run in
-AIRFLOW_VAR_ENVIRONMENT=dev
+AIRFLOW_VAR_ENVIRONMENT=local
 
 ########################################################################################
 # API Keys
@@ -34,26 +34,26 @@ AIRFLOW_VAR_ENVIRONMENT=dev
 # Connection/Variable info
 ########################################################################################
 # Airflow primary metadata database
-# Change the following line in prod to use the appropriate DB
+# Change the following line in production to use the appropriate DB
 AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://airflow:airflow@upstream_db:5432/airflow
 # Remote logging connection ID
 # Replace "access_key" and "secret+key" with the real values. Secret key must be URL-encoded
 AIRFLOW_CONN_AWS_DEFAULT=aws://test_key:test_secret@?region_name=us-east-1&endpoint_url=http%3A%2F%2Fs3%3A5000
 
-# Catalog DB connection. Change the following line in prod to use the appropriate DB
+# Catalog DB connection. Change the following line in production to use the appropriate DB
 AIRFLOW_CONN_POSTGRES_OPENLEDGER_UPSTREAM=postgres://deploy:deploy@upstream_db:5432/openledger
 AIRFLOW_CONN_POSTGRES_OPENLEDGER_TESTING=postgres://deploy:deploy@upstream_db:5432/openledger
 AIRFLOW_CONN_POSTGRES_OPENLEDGER_API_STAGING=postgres://deploy:deploy@db:5432/openledger
 TEST_CONN_ID=postgres_openledger_testing
 
-# Elasticsearch connections. Change the following line in prod to use the appropriate URLs.
+# Elasticsearch connections. Change the following line in production to use the appropriate URLs.
 AIRFLOW_CONN_ELASTICSEARCH_HTTP_PRODUCTION=http://es:9200
 AIRFLOW_CONN_ELASTICSEARCH_HTTP_STAGING=http://es:9200
 
 # AWS CloudWatch connection. Change the following line to toggle alarms during a Data Refresh.
 # AIRFLOW_CONN_AWS_CLOUDWATCH=aws://<key>:<secret>@?region_name=us-east-1
 
-# API DB connection. Change the following line in prod to use the appropriate DB
+# API DB connection. Change the following line in production to use the appropriate DB
 AIRFLOW_CONN_POSTGRES_OPENLEDGER_API=postgres://deploy:deploy@db:5432/openledger
 
 # Slack webhook connection info (note that these values are modified by the Docker entrypoint)

diff --git a/catalog/pytest.ini b/catalog/pytest.ini
@@ -18,6 +18,7 @@ addopts =
     --disable-socket
     --allow-unix-socket
 
+
 # flask
 #   https://docs.sqlalchemy.org/en/20/errors.html#error-b8d9
 #   Warning in dependency, nothing we can do

diff --git a/catalog/tests/dags/common/test_slack.py b/catalog/tests/dags/common/test_slack.py
@@ -292,28 +292,28 @@ def test_send_fails(http_hook_mock):
 @pytest.mark.parametrize(
     "environment, slack_message_override, silenced_notifications, expected_result",
     [
-        # Dev
+        # non-production
         # Message is not sent by default. It is only sent if the override is enabled,
         # AND notifications are not silenced.
         # Default
-        ("dev", False, False, False),
+        ("local", False, False, False),
         # Override is not enabled AND notifications are silenced
-        ("dev", False, True, False),
+        ("local", False, True, False),
         # Override is enabled AND notifications NOT silenced
-        ("dev", True, False, True),
+        ("local", True, False, True),
         # Override is enabled but notifications are silenced
-        ("dev", True, True, False),
-        # Prod
+        ("local", True, True, False),
+        # Production
         # Message is sent by default; the override has no effect, but messages are
         # not sent when notifications are silenced.
         # Default
-        ("prod", False, False, True),
+        ("production", False, False, True),
         # Override not enabled, notifications ARE silenced
-        ("prod", False, True, False),
+        ("production", False, True, False),
         # Override enabled, notifications are NOT silenced
-        ("prod", True, False, True),
+        ("production", True, False, True),
         # Override enabled, notifications ARE silenced
-        ("prod", True, True, False),
+        ("production", True, True, False),
     ],
 )
 def test_should_send_message(
@@ -503,7 +503,7 @@ def test_should_silence_message(silenced_notifications, should_silence):
         )
 
 
-@pytest.mark.parametrize("environment", ["dev", "prod"])
+@pytest.mark.parametrize("environment", ["local", "production"])
 def test_send_message(environment, http_hook_mock):
     with mock.patch("common.slack.should_send_message", return_value=True), mock.patch(
         "common.slack.Variable"
@@ -546,25 +546,25 @@ def test_send_alert():
     "exception, environment, slack_message_override, call_expected",
     [
         # Message with exception
-        (ValueError("Whoops!"), "dev", False, False),
-        (ValueError("Whoops!"), "dev", True, True),
-        (ValueError("Whoops!"), "prod", False, True),
-        (ValueError("Whoops!"), "prod", True, True),
+        (ValueError("Whoops!"), "local", False, False),
+        (ValueError("Whoops!"), "local", True, True),
+        (ValueError("Whoops!"), "production", False, True),
+        (ValueError("Whoops!"), "production", True, True),
         # Strings should also be allowed
-        ("task marked as failed externally", "dev", False, False),
-        ("task marked as failed externally", "dev", True, True),
-        ("task marked as failed externally", "prod", False, True),
-        ("task marked as failed externally", "prod", True, True),
+        ("task marked as failed externally", "local", False, False),
+        ("task marked as failed externally", "local", True, True),
+        ("task marked as failed externally", "production", False, True),
+        ("task marked as failed externally", "production", True, True),
         # Message without exception
-        (None, "dev", False, False),
-        (None, "dev", True, True),
-        (None, "prod", False, True),
-        (None, "prod", True, True),
+        (None, "local", False, False),
+        (None, "local", True, True),
+        (None, "production", False, True),
+        (None, "production", True, True),
         # Exception with upstream failure message should never run
-        (ValueError("Upstream task(s) failed"), "dev", False, False),
-        (ValueError("Upstream task(s) failed"), "dev", True, False),
-        (ValueError("Upstream task(s) failed"), "prod", False, False),
-        (ValueError("Upstream task(s) failed"), "prod", True, False),
+        (ValueError("Upstream task(s) failed"), "local", False, False),
+        (ValueError("Upstream task(s) failed"), "local", True, False),
+        (ValueError("Upstream task(s) failed"), "production", False, False),
+        (ValueError("Upstream task(s) failed"), "production", True, False),
     ],
 )
 def test_on_failure_callback(

diff --git a/catalog/utilities/README.md b/catalog/utilities/README.md
@@ -1,6 +1,6 @@
 # Utilities
 
-This folder contains utilities that may be run in the dev environment (e.g.
-devex utilities, documentation generation, etc.). This folder is not mounted
-into the container in production and thus will not be available or accessible in
-that environment.
+This folder contains utilities that may be run in the local development
+environment (e.g. devex utilities, documentation generation, etc.). This folder
+is not mounted into the container in production and thus will not be available
+or accessible in that environment.
diff --git a/documentation/catalog/guides/deploy.md b/documentation/catalog/guides/deploy.md
@@ -2,8 +2,7 @@
 
 ## Setup
 
-1. Check
-   [the running DAGs](https://airflow.openverse.engineering/home?status=running)
+1. Check [the running DAGs](https://airflow.openverse.org/home?status=running)
    in Airflow to make sure no DAGs are running.
 
    ```{caution}
@@ -25,25 +24,34 @@ the app is built and tagged, deploy production:
 
 1. Checkout the
    [infrastructure repository](https://github.com/wordpress/openverse-infrastructure)
-   and bump the catalog version with the `just bump prod catalog-airflow`
-   command.
-1. Once you've verified that no DAGs are running, update the value of
-   `running_dags_cleared` to `true` in the
-   [production module declaration](https://github.com/WordPress/openverse-infrastructure/blob/27c41ede9b24991909194e0a6477f6b11fceac0c/environments/prod/catalog-airflow.tf#L33).
-1. `just apply prod catalog-airflow` and verify the plan before deploying.
-1. Restore the value of `running_dags_cleared` back to `false`.
+   and bump the catalog version with the `just bump production airflow` command.
+1. `just ansible/playbook production airflow.yml -t airflow` and verify the plan
+   before deploying. Unless configuration variables are changing along with the
+   docker image version, the only change should be to the docker image tag in
+   the compose file. Run the playbook with `-e airflow_apply=true` to instruct
+   the playbook to actually apply any changes.
+
+- If _any_ DAGs are running, the playbook will not apply the changes and will
+  let you know that. If this happens, visit Airflow and confirm the list of
+  running DAGs. If they can be stopped, stop them. If they need to be waited
+  for, wait until they are done, then run the playbook again. If you must deploy
+  and cannot wait for the DAGs to finish (or, if they are deferred and cannot
+  finish), run the playbook with `-e airflow_force=true` to ignore the running
+  DAGs check.
+- See the [setup](#setup) section above for more information about when to
+  decide if it is okay to deploy when DAGs are running.
 
 ## Post-deployment steps
 
 1. Check for any Sentry errors in the maintainer's `#openverse-alerts` channel,
    or in the Sentry UI.
-1. Ensure that Airflow is accessible at <https://airflow.openverse.engineering>.
+1. Ensure that Airflow is accessible at <https://airflow.openverse.org>.
 1. If an Airflow version upgrade was deployed, ensure that the version is
    correct in the Airflow UI (bottom left of the footer on any page).
 1. Review and Approve the automatically-generated changelog pull request in the
    repository.
-1. Push up a PR to the infrastructure repository with the Terraform changes you
-   pushed (the version bump for the relevant module).
+1. Push up a PR to the infrastructure repository with the Ansible group var
+   changes you pushed.
 1. In the event of errors or problems, rollback the application by running the
    appropriate deployment workflow from the WordPress/openverse-infrastructure
    repository using the tag of the latest stable version. You can find the

diff --git a/documentation/catalog/guides/deployment.md b/documentation/catalog/guides/deployment.md
@@ -6,7 +6,7 @@ deployment procedures for both components.
 
 # Airflow
 
-> **URL**: https://airflow.openverse.engineering
+> **URL**: https://airflow.openverse.org
 
 Airflow has two different deployment mechanisms: **service deployments** and
 **DAG deployments**. Service deployments occur when the Airflow service itself

diff --git a/documentation/catalog/guides/quickstart.md b/documentation/catalog/guides/quickstart.md
@@ -84,7 +84,7 @@ The `.env` file is split into four sections:
 3. Connection/Variable info - this will not likely need to be modified for local
    development, though the values will need to be changed in production
 4. Other config - misc. configuration settings, some of which are useful for
-   local dev
+   local development
 
 The `.env` file does not need to be modified if you only want to run the tests.
 

diff --git a/documentation/ingestion_server/guides/deploy.md b/documentation/ingestion_server/guides/deploy.md
@@ -2,8 +2,8 @@
 
 ## Setup
 
-1. Check [Airflow](https://airflow.openverse.engineering/home?tags=data_refresh)
-   to make sure a data refresh isn't occurring.
+1. Check [Airflow](https://airflow.openverse.org/home?tags=data_refresh) to make
+   sure a data refresh isn't occurring.
 1. [Publish the drafted ingestion server release in the GitHub release page of the monorepo](https://github.com/WordPress/openverse/releases?q=ingestion_server-)
    - Here you can preview the changes included in the ingestion server release
      and decide whether a release is necessary and adjust monitoring during the