From fbaac1ab8820d9856ca104feb34ea753378222ec Mon Sep 17 00:00:00 2001 From: Pal Kerecsenyi Date: Mon, 22 Sep 2025 11:42:42 +0200 Subject: [PATCH] perf(models): change `extra_data` to `JSONB` * `JSONB` is a newer and more efficient format for storing JSON in PostgreSQL. Compared to `JSON`, it offers significantly faster querying and indexing support (but is very slightly slower to input). The JSON is decomposed into a binary format rather than being stored in plaintext. * From a high-level perspective, the access of the data is unchanged in Python so this is not a breaking change. However, the new column type supports more querying operations which could be used in the future. * Migrating the column type is quite easy for small tables but can cause performance and stability issues for larger ones. Therefore, a default Alembic migration has been included for the vast majority of use cases, as well as a step-by-step alternative guide for larger instances. Instances with more than ~50k rows in `oauthclient_remoteaccount` are advised not to use the Alembic migration and to instead follow the upgrade guide. --- CHANGES.rst | 6 ++ docs/index.rst | 1 + docs/upgrading.rst | 66 +++++++++++++++++++ invenio_oauthclient/__init__.py | 2 +- .../alembic/1758275763_extra_data_jsonb.py | 39 +++++++++++ invenio_oauthclient/models.py | 11 +++- 6 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 docs/upgrading.rst create mode 100644 invenio_oauthclient/alembic/1758275763_extra_data_jsonb.py diff --git a/CHANGES.rst b/CHANGES.rst index 4944e247..c3a31865 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,12 @@ Changes ======= +Version v6.0.0 (released 2025-XX-XX) + +- perf(models): changed SQL column type of `extra_data` to `JSONB` instead of `JSON` for PostgreSQL + + - This migration could flood the WAL when run on very large `oauthclient_remoteaccount` tables (~50k+ rows) as well as causing a full lock that lasts several minutes and stops users from being able to log in with third-party accounts. This won't happen in the majority of cases. For instances not meeting this size criteria, the automated Alembic migration will suffice. However, for tables above this size, please use instead the step-by-step instructions in the upgrade guide (included in the documentation of this module). + Version v5.3.1 (released 2025-10-21) - i18n: pulled translations diff --git a/docs/index.rst b/docs/index.rst index 746c2b69..efba1d79 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,6 +22,7 @@ Invenio-OAuthClient. configuration usage examplesapp + upgrading API Reference ============= diff --git a/docs/upgrading.rst b/docs/upgrading.rst new file mode 100644 index 00000000..432bf0c8 --- /dev/null +++ b/docs/upgrading.rst @@ -0,0 +1,66 @@ +Upgrading +========= + +This page contains specific information on upgrading between certain versions of ``invenio-oauthclient`` where special steps are involved. + +====== +v6.0.0 +====== + +This version involves an Alembic migration (``1758275763``) moving the ``extra_data`` column from the ``JSON`` type to the ``JSONB`` type (**only for PostgreSQL**). +By doing this, we can improve performance and use more advanced JSON queries. + +For the majority of users, this shouldn't be an issue as the migration will be handled automatically when upgrading. +However, users with an ``oauthclient_remoteaccount`` table with ~50k+ rows should use an alternative approach, as this operation could overfill the WAL and affect the stability of the database, as well as creating a full lock for several minutes. + +Instead, use these steps: + +1. Install the new version of `invenio-oauthclient`, but do not run the Alembic migrations. + +2. In an SQL console, run: + + .. code-block:: sql + + ALTER TABLE oauthclient_remoteaccount ADD COLUMN extra_data_b jsonb; + +3. Next, run this query repeatedly until the response indicates that no new rows are being updated. + You can control the batch size depending on your requirements using the ``LIMIT`` value. + + .. code-block:: sql + + WITH cte AS ( + SELECT id + FROM oauthclient_remoteaccount + WHERE extra_data_b IS NULL + ORDER BY id + LIMIT 1000 + FOR UPDATE SKIP LOCKED + ) + UPDATE oauthclient_remoteaccount r + SET extra_data_b = r.extra_data::jsonb + FROM cte + WHERE r.id = cte.id + RETURNING r.id; + +4. Double check there are no rows left to migrate. + + .. code-block:: sql + + SELECT COUNT(*) FROM oauthclient_remoteaccount WHERE extra_data_b IS NULL; + + This should return ``0``. If it does not, continue repeating step 3. + +5. Drop and rename the columns in a simultaneous operation (this requires a brief lock but is much faster than the normal migration). + + .. code-block:: sql + + BEGIN; + ALTER TABLE oauthclient_remoteaccount DROP COLUMN extra_data; + ALTER TABLE oauthclient_remoteaccount RENAME COLUMN extra_data_b TO extra_data; + COMMIT; + +6. Finally, mark the relevant migration as having been manually performed. + + .. code-block:: bash + + invenio alembic stamp 1758275763 diff --git a/invenio_oauthclient/__init__.py b/invenio_oauthclient/__init__.py index 100d9af8..dc27c2e7 100644 --- a/invenio_oauthclient/__init__.py +++ b/invenio_oauthclient/__init__.py @@ -14,7 +14,7 @@ from .oauth import oauth_link_external_id, oauth_unlink_external_id from .proxies import current_oauthclient -__version__ = "5.3.1" +__version__ = "6.0.0" __all__ = ( "__version__", diff --git a/invenio_oauthclient/alembic/1758275763_extra_data_jsonb.py b/invenio_oauthclient/alembic/1758275763_extra_data_jsonb.py new file mode 100644 index 00000000..5c857ae5 --- /dev/null +++ b/invenio_oauthclient/alembic/1758275763_extra_data_jsonb.py @@ -0,0 +1,39 @@ +# +# This file is part of Invenio. +# Copyright (C) 2016-2018 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Extra Data JSONB.""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "1758275763" +down_revision = "aaa265b0afa6" +branch_labels = () +depends_on = None + + +def upgrade(): + """Upgrade database.""" + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "oauthclient_remoteaccount", + "extra_data", + type_=sa.dialects.postgresql.JSONB, + postgresql_using="extra_data::text::jsonb", + ) + + +def downgrade(): + """Downgrade database.""" + if op.get_context().dialect.name == "postgresql": + op.alter_column( + "oauthclient_remoteaccount", + "extra_data", + type_=sa.dialects.postgresql.JSON, + postgresql_using="extra_data::text::json", + ) diff --git a/invenio_oauthclient/models.py b/invenio_oauthclient/models.py index f0c9c6cf..f418950d 100644 --- a/invenio_oauthclient/models.py +++ b/invenio_oauthclient/models.py @@ -16,6 +16,7 @@ # here, means previous imports won't break. from invenio_accounts.models import User, UserIdentity from invenio_db import db +from sqlalchemy.dialects import postgresql from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import backref from sqlalchemy_utils import JSONType, StringEncryptedType, Timestamp @@ -45,7 +46,15 @@ class RemoteAccount(db.Model, Timestamp): client_id = db.Column(db.String(255), nullable=False) """Client ID of remote application (defined in OAUTHCLIENT_REMOTE_APPS).""" - extra_data = db.Column(MutableDict.as_mutable(JSONType), nullable=False) + extra_data = db.Column( + MutableDict.as_mutable( + db.JSON() + .with_variant(postgresql.JSONB(), "postgresql") + .with_variant(JSONType(), "sqlite") + .with_variant(JSONType(), "mysql") + ), + nullable=False, + ) """Extra data associated with this linked account.""" #