From e25f5e092935f185b257cc1fbae5cc6a8c131818 Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 18 Sep 2024 15:52:34 -0400 Subject: [PATCH 01/13] Add directory for migrations data fixing scripts --- lib/galaxy/model/migrations/data_fixes/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 lib/galaxy/model/migrations/data_fixes/__init__.py diff --git a/lib/galaxy/model/migrations/data_fixes/__init__.py b/lib/galaxy/model/migrations/data_fixes/__init__.py new file mode 100644 index 000000000000..8b48aef46800 --- /dev/null +++ b/lib/galaxy/model/migrations/data_fixes/__init__.py @@ -0,0 +1,4 @@ +""" +Package contains code for fixing inconsistent data in the database that must be +run together with a migration script. +""" From 505cd87eb59adc5b28b3fc8655d3e1b97c870baa Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 18 Sep 2024 17:22:04 -0400 Subject: [PATCH 02/13] Add username deduplication data fixer --- .../migrations/data_fixes/user_table_fixer.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 lib/galaxy/model/migrations/data_fixes/user_table_fixer.py diff --git a/lib/galaxy/model/migrations/data_fixes/user_table_fixer.py b/lib/galaxy/model/migrations/data_fixes/user_table_fixer.py new file mode 100644 index 000000000000..c293dd2fec4f --- /dev/null +++ b/lib/galaxy/model/migrations/data_fixes/user_table_fixer.py @@ -0,0 +1,47 @@ +from sqlalchemy import ( + func, + Result, + select, + update, +) + +from galaxy.model import User + + +class UsernameDeduplicator: + + def __init__(self, connection): + self.connection = connection + + def run(self): + """ + Deduplicate usernames by generating a unique value for all duplicates, keeping + the username of the most recently created user unchanged. + """ + duplicates = self._get_duplicate_username_data() + prev_username = None + for id, username, _ in duplicates: + if username == prev_username: + new_username = self._generate_next_available_username(username) + stmt = update(User).where(User.id == id).values(username=new_username) + self.connection.execute(stmt) + else: + prev_username = username + + def _get_duplicate_username_data(self) -> Result: + # Duplicate usernames + counts = select(User.username, func.count()).group_by(User.username).having(func.count() > 1) + sq = select(User.username).select_from(counts.cte()) + # User data for records with duplicate usernames (ordering: newest to oldest) + stmt = ( + select(User.id, User.username, User.create_time) + .where(User.username.in_(sq)) + .order_by(User.username, User.create_time.desc()) + ) + return self.connection.execute(stmt) + + def _generate_next_available_username(self, username): + i = 1 + while self.connection.execute(select(User).where(User.username == f"{username}-{i}")).first(): + i += 1 + return f"{username}-{i}" From 90f298bae8e9a11cb27cd65f35d95203da09d1fb Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 3 Jul 2024 12:56:06 -0400 Subject: [PATCH 03/13] Add migration for username column unique constraint --- ...a6168_username_column_unique_constraint.py | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 lib/galaxy/model/migrations/alembic/versions_gxy/d619fdfa6168_username_column_unique_constraint.py diff --git a/lib/galaxy/model/migrations/alembic/versions_gxy/d619fdfa6168_username_column_unique_constraint.py b/lib/galaxy/model/migrations/alembic/versions_gxy/d619fdfa6168_username_column_unique_constraint.py new file mode 100644 index 000000000000..de09d29097bb --- /dev/null +++ b/lib/galaxy/model/migrations/alembic/versions_gxy/d619fdfa6168_username_column_unique_constraint.py @@ -0,0 +1,51 @@ +"""Username column unique constraint + +Revision ID: d619fdfa6168 +Revises: d2d8f51ebb7e +Create Date: 2024-07-02 13:13:10.325586 +""" + +from alembic import op + +from galaxy.model.database_object_names import build_index_name +from galaxy.model.migrations.data_fixes.user_table_fixer import UsernameDeduplicator +from galaxy.model.migrations.util import ( + create_index, + drop_index, + index_exists, + transaction, +) + +# revision identifiers, used by Alembic. +revision = "d619fdfa6168" +down_revision = "d2d8f51ebb7e" +branch_labels = None +depends_on = None + +table_name = "galaxy_user" +column_name = "username" +index_name = build_index_name(table_name, [column_name]) + + +def upgrade(): + with transaction(): + _fix_duplicate_usernames() + # Existing databases may have an existing index we no longer need + # New databases will not have that index, so we must check. + if index_exists(index_name, table_name, False): + drop_index(index_name, table_name) + # Create a UNIQUE index + create_index(index_name, table_name, [column_name], unique=True) + + +def downgrade(): + with transaction(): + drop_index(index_name, table_name) + # Restore a non-unique index + create_index(index_name, table_name, [column_name]) + + +def _fix_duplicate_usernames(): + """Fix records with duplicate usernames""" + connection = op.get_bind() + UsernameDeduplicator(connection).run() From 0e4559f7da7d931b526cf7211568bb6d40ecacbf Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 18 Sep 2024 18:31:30 -0400 Subject: [PATCH 04/13] Add email deduplication data fixer --- .../migrations/data_fixes/user_table_fixer.py | 73 ++++++++++++++++++- 1 file changed, 69 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/model/migrations/data_fixes/user_table_fixer.py b/lib/galaxy/model/migrations/data_fixes/user_table_fixer.py index c293dd2fec4f..4b9054872cd0 100644 --- a/lib/galaxy/model/migrations/data_fixes/user_table_fixer.py +++ b/lib/galaxy/model/migrations/data_fixes/user_table_fixer.py @@ -1,7 +1,10 @@ +import uuid + from sqlalchemy import ( func, Result, select, + text, update, ) @@ -17,25 +20,25 @@ def run(self): """ Deduplicate usernames by generating a unique value for all duplicates, keeping the username of the most recently created user unchanged. + Records updated with the generated value are marked as deleted. """ duplicates = self._get_duplicate_username_data() prev_username = None for id, username, _ in duplicates: if username == prev_username: new_username = self._generate_next_available_username(username) - stmt = update(User).where(User.id == id).values(username=new_username) + stmt = update(User).where(User.id == id).values(username=new_username, deleted=True) self.connection.execute(stmt) else: prev_username = username def _get_duplicate_username_data(self) -> Result: # Duplicate usernames - counts = select(User.username, func.count()).group_by(User.username).having(func.count() > 1) - sq = select(User.username).select_from(counts.cte()) + duplicates_stmt = select(User.username).group_by(User.username).having(func.count() > 1) # User data for records with duplicate usernames (ordering: newest to oldest) stmt = ( select(User.id, User.username, User.create_time) - .where(User.username.in_(sq)) + .where(User.username.in_(duplicates_stmt)) .order_by(User.username, User.create_time.desc()) ) return self.connection.execute(stmt) @@ -45,3 +48,65 @@ def _generate_next_available_username(self, username): while self.connection.execute(select(User).where(User.username == f"{username}-{i}")).first(): i += 1 return f"{username}-{i}" + + +class EmailDeduplicator: + + def __init__(self, connection): + self.connection = connection + + def run(self): + """ + Deduplicate user emails by generating a unique value for all duplicates, keeping + the email of the most recently created user that has one or more history unchanged. + If such a user does not exist, keep the oldest user. + Records updated with the generated value are marked as deleted (we presume them + to be invalid, and the user should not be able to login). + """ + stmt = select(User.email).group_by(User.email).having(func.count() > 1) + duplicate_emails = self.connection.scalars(stmt) + for email in duplicate_emails: + users = self._get_users_with_same_email(email) + user_with_history = self._find_oldest_user_with_history(users) + duplicates = self._get_users_to_deduplicate(users, user_with_history) + self._deduplicate_users(email, duplicates) + + def _get_users_with_same_email(self, email: str): + sql = text( + """ + SELECT u.id, EXISTS(SELECT h.id FROM history h WHERE h.user_id = u.id) + FROM galaxy_user u + WHERE u.email = :email + ORDER BY u.create_time + """ + ) + params = {"email": email} + return self.connection.execute(sql, params).all() + + def _find_oldest_user_with_history(self, users): + for user_id, exists in users: + if exists: + return user_id + return None + + def _get_users_to_deduplicate(self, users, user_with_history): + if user_with_history: + # Preserve the oldest user with a history + return [user_id for user_id, _ in users if user_id != user_with_history] + else: + # Preserve the oldest user + return [user_id for user_id, _ in users[1:]] + + def _deduplicate_users(self, email, to_deduplicate): + for id in to_deduplicate: + new_email = self._generate_replacement_for_duplicate_email(email) + stmt = update(User).where(User.id == id).values(email=new_email, deleted=True) + self.connection.execute(stmt) + + def _generate_replacement_for_duplicate_email(self, email: str) -> str: + """ + Generate a replacement for a duplicate email value. The new value consists of the original + email and a unique suffix. Since the original email is part of the new value, it will be + possible to retrieve the user record based on this value, if needed. + """ + return f"{email}-{uuid.uuid4()}" From bacc3046f3759ba60043fb8c80a9c00143d5b677 Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 3 Jul 2024 19:58:32 -0400 Subject: [PATCH 05/13] Add migration for email column unique constraint --- ...95475b58_email_column_unique_constraint.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 lib/galaxy/model/migrations/alembic/versions_gxy/1cf595475b58_email_column_unique_constraint.py diff --git a/lib/galaxy/model/migrations/alembic/versions_gxy/1cf595475b58_email_column_unique_constraint.py b/lib/galaxy/model/migrations/alembic/versions_gxy/1cf595475b58_email_column_unique_constraint.py new file mode 100644 index 000000000000..ba356b1c770d --- /dev/null +++ b/lib/galaxy/model/migrations/alembic/versions_gxy/1cf595475b58_email_column_unique_constraint.py @@ -0,0 +1,52 @@ +"""Email column unique constraint + +Revision ID: 1cf595475b58 +Revises: d619fdfa6168 +Create Date: 2024-07-03 19:53:22.443016 +""" + +from alembic import op + +from galaxy.model.database_object_names import build_index_name +from galaxy.model.migrations.data_fixes.user_table_fixer import EmailDeduplicator +from galaxy.model.migrations.util import ( + create_index, + drop_index, + index_exists, + transaction, +) + +# revision identifiers, used by Alembic. +revision = "1cf595475b58" +down_revision = "d619fdfa6168" +branch_labels = None +depends_on = None + + +table_name = "galaxy_user" +column_name = "email" +index_name = build_index_name(table_name, [column_name]) + + +def upgrade(): + with transaction(): + _fix_duplicate_emails() + # Existing databases may have an existing index we no longer need + # New databases will not have that index, so we must check. + if index_exists(index_name, table_name, False): + drop_index(index_name, table_name) + # Create a UNIQUE index + create_index(index_name, table_name, [column_name], unique=True) + + +def downgrade(): + with transaction(): + drop_index(index_name, table_name) + # Restore a non-unique index + create_index(index_name, table_name, [column_name]) + + +def _fix_duplicate_emails(): + """Fix records with duplicate usernames""" + connection = op.get_bind() + EmailDeduplicator(connection).run() From 07c0b2de222235925df79b235c2144896ecd915d Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 3 Jul 2024 20:05:20 -0400 Subject: [PATCH 06/13] Update the model w/unique constraints --- lib/galaxy/model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index f45921606266..c7ab8a57c06e 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -780,7 +780,7 @@ class User(Base, Dictifiable, RepresentById): id: Mapped[int] = mapped_column(primary_key=True) create_time: Mapped[datetime] = mapped_column(default=now, nullable=True) update_time: Mapped[datetime] = mapped_column(default=now, onupdate=now, nullable=True) - email: Mapped[str] = mapped_column(TrimmedString(255), index=True) + email: Mapped[str] = mapped_column(TrimmedString(255), index=True, unique=True) username: Mapped[Optional[str]] = mapped_column(TrimmedString(255), index=True, unique=True) password: Mapped[str] = mapped_column(TrimmedString(255)) last_password_change: Mapped[Optional[datetime]] = mapped_column(default=now) From 527b3711040205613fa66609e13b1b9a81ab72b0 Mon Sep 17 00:00:00 2001 From: John Davis Date: Wed, 3 Jul 2024 21:55:42 -0400 Subject: [PATCH 07/13] Fix integration test that violated db integrity constraint --- test/integration/test_celery_user_rate_limit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/test_celery_user_rate_limit.py b/test/integration/test_celery_user_rate_limit.py index b9b832c5cddc..c36dacc880fd 100644 --- a/test/integration/test_celery_user_rate_limit.py +++ b/test/integration/test_celery_user_rate_limit.py @@ -50,7 +50,7 @@ def setup_users(dburl: str, num_users: int = 2): for user_id in user_ids_to_add: conn.execute( text("insert into galaxy_user(id, active, email, password) values (:id, :active, :email, :pw)"), - [{"id": user_id, "active": True, "email": "e", "pw": "p"}], + [{"id": user_id, "active": True, "email": f"e{user_id}", "pw": "p"}], ) From b261dea43a265d1498fa833534ca6a3c985d8bd6 Mon Sep 17 00:00:00 2001 From: John Davis Date: Thu, 19 Sep 2024 15:24:04 -0400 Subject: [PATCH 08/13] Randomize test user email to respect unique constraint --- lib/galaxy/model/unittest_utils/utils.py | 13 +++++++++++++ test/unit/data/model/conftest.py | 17 ++++------------- test/unit/workflows/test_run_parameters.py | 3 ++- 3 files changed, 19 insertions(+), 14 deletions(-) create mode 100644 lib/galaxy/model/unittest_utils/utils.py diff --git a/lib/galaxy/model/unittest_utils/utils.py b/lib/galaxy/model/unittest_utils/utils.py new file mode 100644 index 000000000000..c558b52b51de --- /dev/null +++ b/lib/galaxy/model/unittest_utils/utils.py @@ -0,0 +1,13 @@ +import random +import string + + +def random_str() -> str: + alphabet = string.ascii_lowercase + string.digits + size = random.randint(5, 10) + return "".join(random.choices(alphabet, k=size)) + + +def random_email() -> str: + text = random_str() + return f"{text}@galaxy.testing" diff --git a/test/unit/data/model/conftest.py b/test/unit/data/model/conftest.py index 26ea7d8b7cc2..aff81d80af23 100644 --- a/test/unit/data/model/conftest.py +++ b/test/unit/data/model/conftest.py @@ -1,7 +1,5 @@ import contextlib import os -import random -import string import tempfile import uuid @@ -10,6 +8,10 @@ from sqlalchemy.orm import Session from galaxy import model as m +from galaxy.model.unittest_utils.utils import ( + random_email, + random_str, +) @pytest.fixture @@ -449,17 +451,6 @@ def transaction(session): yield -def random_str() -> str: - alphabet = string.ascii_lowercase + string.digits - size = random.randint(5, 10) - return "".join(random.choices(alphabet, k=size)) - - -def random_email() -> str: - text = random_str() - return f"{text}@galaxy.testing" - - def write_to_db(session, model) -> None: with transaction(session): session.add(model) diff --git a/test/unit/workflows/test_run_parameters.py b/test/unit/workflows/test_run_parameters.py index 76eae8955744..5718ac923a40 100644 --- a/test/unit/workflows/test_run_parameters.py +++ b/test/unit/workflows/test_run_parameters.py @@ -1,5 +1,6 @@ from galaxy import model from galaxy.model.base import transaction +from galaxy.model.unittest_utils.utils import random_email from galaxy.workflow.run_request import ( _normalize_inputs, _normalize_step_parameters, @@ -89,7 +90,7 @@ def __new_input(): def __workflow_fixure(trans): - user = model.User(email="testworkflow_params@bx.psu.edu", password="pass") + user = model.User(email=random_email(), password="pass") stored_workflow = model.StoredWorkflow() stored_workflow.user = user workflow = model.Workflow() From d8abcadd262554547d7be130d4b1a27c3cf76af8 Mon Sep 17 00:00:00 2001 From: John Davis Date: Thu, 19 Sep 2024 15:27:15 -0400 Subject: [PATCH 09/13] Fix bug in test_rule_helper --- test/unit/app/jobs/test_rule_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/app/jobs/test_rule_helper.py b/test/unit/app/jobs/test_rule_helper.py index 1d1197a0dc78..f3be1cdd20c0 100644 --- a/test/unit/app/jobs/test_rule_helper.py +++ b/test/unit/app/jobs/test_rule_helper.py @@ -66,7 +66,7 @@ def __setup_fixtures(app): # user3 has no jobs. user1 = model.User(email=USER_EMAIL_1, password="pass1") user2 = model.User(email=USER_EMAIL_2, password="pass2") - user3 = model.User(email=USER_EMAIL_2, password="pass2") + user3 = model.User(email=USER_EMAIL_3, password="pass3") app.add(user1, user2, user3) From 11a25ae19928a0b33cb671eb00fe452e93b44ff8 Mon Sep 17 00:00:00 2001 From: John Davis Date: Thu, 19 Sep 2024 15:34:38 -0400 Subject: [PATCH 10/13] Fix test_quota to respect constraint --- test/unit/data/test_quota.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/unit/data/test_quota.py b/test/unit/data/test_quota.py index 5c34ea7cb056..7a3695894445 100644 --- a/test/unit/data/test_quota.py +++ b/test/unit/data/test_quota.py @@ -1,6 +1,7 @@ import uuid from galaxy import model +from galaxy.model.unittest_utils.utils import random_email from galaxy.objectstore import ( QuotaSourceInfo, QuotaSourceMap, @@ -16,7 +17,7 @@ class TestPurgeUsage(BaseModelTestCase): def setUp(self): super().setUp() model = self.model - u = model.User(email="purge_usage@example.com", password="password") + u = model.User(email=random_email(), password="password") u.disk_usage = 25 self.persist(u) From 600523b7733d403a16def0ece22e09c802baba3a Mon Sep 17 00:00:00 2001 From: John Davis Date: Thu, 19 Sep 2024 16:31:29 -0400 Subject: [PATCH 11/13] Fix test_galaxy_mapping to respect constraint --- test/unit/data/test_galaxy_mapping.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/unit/data/test_galaxy_mapping.py b/test/unit/data/test_galaxy_mapping.py index 3e99841db44b..eed3eb01ad67 100644 --- a/test/unit/data/test_galaxy_mapping.py +++ b/test/unit/data/test_galaxy_mapping.py @@ -22,6 +22,7 @@ get_object_session, ) from galaxy.model.security import GalaxyRBACAgent +from galaxy.model.unittest_utils.utils import random_email from galaxy.objectstore import QuotaSourceMap from galaxy.util.unittest import TestCase @@ -78,7 +79,7 @@ def expunge(cls): class TestMappings(BaseModelTestCase): def test_dataset_instance_order(self) -> None: - u = model.User(email="mary@example.com", password="password") + u = model.User(email=random_email(), password="password") h1 = model.History(name="History 1", user=u) elements = [] list_pair = model.DatasetCollection(collection_type="list:paired") @@ -213,7 +214,7 @@ def test_nested_collection_attributes(self): assert c4.dataset_elements == [dce1, dce2] def test_history_audit(self): - u = model.User(email="contents@foo.bar.baz", password="password") + u = model.User(email=random_email(), password="password") h1 = model.History(name="HistoryAuditHistory", user=u) h2 = model.History(name="HistoryAuditHistory", user=u) @@ -272,7 +273,7 @@ def test_flush_refreshes(self): # states and flushing in SQL Alchemy is very subtle and it is good to have a executable # reference for how it behaves in the context of Galaxy objects. model = self.model - user = model.User(email="testworkflows@bx.psu.edu", password="password") + user = model.User(email=random_email(), password="password") galaxy_session = model.GalaxySession() galaxy_session_other = model.GalaxySession() galaxy_session.user = user @@ -345,7 +346,7 @@ def test_flush_refreshes(self): assert "id" not in inspect(galaxy_model_object_new).unloaded def test_workflows(self): - user = model.User(email="testworkflows@bx.psu.edu", password="password") + user = model.User(email=random_email(), password="password") child_workflow = _workflow_from_steps(user, []) self.persist(child_workflow) From 4471c6a851dfc95c5a55e18cca440c6ad2cee333 Mon Sep 17 00:00:00 2001 From: John Davis Date: Sat, 21 Sep 2024 00:17:20 -0400 Subject: [PATCH 12/13] Refactor tests to reduce duplication --- test/unit/data/model/__init__.py | 10 ++++++++++ test/unit/data/model/db/__init__.py | 11 ----------- test/unit/data/model/db/conftest.py | 2 +- test/unit/data/model/db/test_misc.py | 6 ++---- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/test/unit/data/model/__init__.py b/test/unit/data/model/__init__.py index e69de29bb2d1..7d0a1eeb1f8d 100644 --- a/test/unit/data/model/__init__.py +++ b/test/unit/data/model/__init__.py @@ -0,0 +1,10 @@ +PRIVATE_OBJECT_STORE_ID = "my_private_data" + + +class MockObjectStore: + + def is_private(self, object): + if object.object_store_id == PRIVATE_OBJECT_STORE_ID: + return True + else: + return False diff --git a/test/unit/data/model/db/__init__.py b/test/unit/data/model/db/__init__.py index 13a615086ebe..817efe285c17 100644 --- a/test/unit/data/model/db/__init__.py +++ b/test/unit/data/model/db/__init__.py @@ -3,20 +3,9 @@ namedtuple, ) -PRIVATE_OBJECT_STORE_ID = "my_private_data" - MockTransaction = namedtuple("MockTransaction", "user") -class MockObjectStore: - - def is_private(self, object): - if object.object_store_id == PRIVATE_OBJECT_STORE_ID: - return True - else: - return False - - def verify_items(items, expected_items): """ Assert that items and expected_items contain the same elements. diff --git a/test/unit/data/model/db/conftest.py b/test/unit/data/model/db/conftest.py index d36a38e71ace..1693cf27eaac 100644 --- a/test/unit/data/model/db/conftest.py +++ b/test/unit/data/model/db/conftest.py @@ -13,7 +13,7 @@ from galaxy import model as m from galaxy.datatypes.registry import Registry as DatatypesRegistry from galaxy.model.triggers.update_audit_table import install as install_timestamp_triggers -from . import MockObjectStore +from .. import MockObjectStore if TYPE_CHECKING: from sqlalchemy.engine import Engine diff --git a/test/unit/data/model/db/test_misc.py b/test/unit/data/model/db/test_misc.py index b8ef3fe5cf0c..9dadda4c326a 100644 --- a/test/unit/data/model/db/test_misc.py +++ b/test/unit/data/model/db/test_misc.py @@ -5,10 +5,8 @@ from galaxy import model as m from galaxy.model.unittest_utils.db_helpers import get_hdca_by_name -from . import ( - MockTransaction, - PRIVATE_OBJECT_STORE_ID, -) +from . import MockTransaction +from .. import PRIVATE_OBJECT_STORE_ID def test_history_update(make_history, make_hda, session): From 87573ee617f34c4f0472f3c827f041505a6eeb60 Mon Sep 17 00:00:00 2001 From: John Davis Date: Sat, 21 Sep 2024 00:18:02 -0400 Subject: [PATCH 13/13] Add tests for migration data fixes --- .../data/model/migration_fixes/__init__.py | 0 .../data/model/migration_fixes/conftest.py | 47 ++++++ .../model/migration_fixes/test_migrations.py | 154 ++++++++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 test/unit/data/model/migration_fixes/__init__.py create mode 100644 test/unit/data/model/migration_fixes/conftest.py create mode 100644 test/unit/data/model/migration_fixes/test_migrations.py diff --git a/test/unit/data/model/migration_fixes/__init__.py b/test/unit/data/model/migration_fixes/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/unit/data/model/migration_fixes/conftest.py b/test/unit/data/model/migration_fixes/conftest.py new file mode 100644 index 000000000000..39ba30f5462a --- /dev/null +++ b/test/unit/data/model/migration_fixes/conftest.py @@ -0,0 +1,47 @@ +from typing import ( + Generator, + TYPE_CHECKING, +) + +import pytest +from sqlalchemy import ( + create_engine, + text, +) +from sqlalchemy.orm import Session + +from galaxy import model as m + +if TYPE_CHECKING: + from sqlalchemy.engine import Engine + +from galaxy.model.unittest_utils.model_testing_utils import ( # noqa: F401 - url_factory is a fixture we have to import explicitly + sqlite_url_factory, +) + + +@pytest.fixture() +def db_url(sqlite_url_factory): # noqa: F811 + return sqlite_url_factory() + + +@pytest.fixture() +def engine(db_url: str) -> "Engine": + return create_engine(db_url) + + +@pytest.fixture +def session(engine: "Engine") -> Session: + return Session(engine) + + +@pytest.fixture(autouse=True) +def clear_database(engine: "Engine") -> "Generator": + """Delete all rows from all tables. Called after each test.""" + yield + with engine.begin() as conn: + for table in m.mapper_registry.metadata.tables: + # Unless db is sqlite, disable foreign key constraints to delete out of order + if engine.name != "sqlite": + conn.execute(text(f"ALTER TABLE {table} DISABLE TRIGGER ALL")) + conn.execute(text(f"DELETE FROM {table}")) diff --git a/test/unit/data/model/migration_fixes/test_migrations.py b/test/unit/data/model/migration_fixes/test_migrations.py new file mode 100644 index 000000000000..12b0791689aa --- /dev/null +++ b/test/unit/data/model/migration_fixes/test_migrations.py @@ -0,0 +1,154 @@ +import pytest + +from galaxy.model import User +from galaxy.model.unittest_utils.migration_scripts_testing_utils import ( # noqa: F401 - contains fixtures we have to import explicitly + run_command, + tmp_directory, +) + +COMMAND = "manage_db.sh" + + +@pytest.fixture(autouse=True) +def upgrade_database_after_test(): + """Run after each test for proper cleanup""" + yield + run_command(f"{COMMAND} upgrade") + + +def test_1cf595475b58(monkeypatch, session, make_user, make_history): + # Initialize db and migration environment + dburl = str(session.bind.url) + monkeypatch.setenv("GALAXY_CONFIG_OVERRIDE_DATABASE_CONNECTION", dburl) + monkeypatch.setenv("GALAXY_INSTALL_CONFIG_OVERRIDE_INSTALL_DATABASE_CONNECTION", dburl) + run_command(f"{COMMAND} init") + + # STEP 0: Load pre-migration state + run_command(f"{COMMAND} downgrade d619fdfa6168") + + # STEP 1: Load users with duplicate emails + + # Duplicate group 1: users have no histories + # Expect: oldest user preserved + u1_1 = make_user(email="a") + u1_2 = make_user(email="a") + u1_3 = make_user(email="a") + original_email1 = u1_1.email + assert u1_1.email == u1_2.email == u1_3.email + assert u1_1.create_time < u1_2.create_time < u1_3.create_time # u1_1 is oldest user + + # Duplicate group 2: oldest user does NOT have a history, another user has a history + # Expect: user with history preserved + u2_1 = make_user(email="b") + u2_2 = make_user(email="b") + u2_3 = make_user(email="b") + original_email2 = u2_1.email + assert u2_1.email == u2_2.email == u2_3.email + assert u2_1.create_time < u2_2.create_time < u2_3.create_time # u2_1 is oldest user + + make_history(user=u2_2) # u2_2 has a history + + # Duplicate group 3: oldest user does NOT have a history, 2 users have a history + # Expect: oldest user with history preserved + u3_1 = make_user(email="c") + u3_2 = make_user(email="c") + u3_3 = make_user(email="c") + original_email3 = u3_1.email + assert u3_1.email == u3_2.email == u3_3.email + assert u3_1.create_time < u3_2.create_time < u3_3.create_time # u2_1 is oldest user + + make_history(user=u3_2) # u3_2 has a history + make_history(user=u3_3) # u3_3 has a history + + # User w/o duplicate email + u4 = make_user() + original_email4 = u4.email + + # STEP 2: Run migration + + run_command(f"{COMMAND} upgrade 1cf595475b58") + session.expire_all() + + # STEP 3: Verify deduplicated results + + # Duplicate group 1: + u1_1_fixed = session.get(User, u1_1.id) + u1_2_fixed = session.get(User, u1_2.id) + u1_3_fixed = session.get(User, u1_3.id) + + # oldest user's email is preserved; the rest are deduplicated + assert u1_1.email == original_email1 + assert u1_1.email != u1_2.email != u1_3.email + # deduplicated users are marked as deleted + assert u1_1_fixed.deleted is False + assert u1_2_fixed.deleted is True + assert u1_3_fixed.deleted is True + + # Duplicate group 2: + u2_1_fixed = session.get(User, u2_1.id) + u2_2_fixed = session.get(User, u2_2.id) + u2_3_fixed = session.get(User, u2_3.id) + + # the email of the user with a history is preserved; the rest are deduplicated + assert u2_2.email == original_email2 + assert u2_1.email != u1_2.email != u1_3.email + # deduplicated users are marked as deleted + assert u2_1_fixed.deleted is True + assert u2_2_fixed.deleted is False + assert u2_3_fixed.deleted is True + + # Duplicate group 3: + u3_1_fixed = session.get(User, u3_1.id) + u3_2_fixed = session.get(User, u3_2.id) + u3_3_fixed = session.get(User, u3_3.id) + + # the email of the oldest user with a history is preserved; the rest are deduplicated + assert u3_2.email == original_email3 + assert u3_1.email != u3_2.email != u3_3.email + # deduplicated users are marked as deleted + assert u3_1_fixed.deleted is True + assert u3_2_fixed.deleted is False + assert u3_3_fixed.deleted is True + + # User w/o duplicate email + u4_no_change = session.get(User, u4.id) + assert u4_no_change.email == original_email4 + assert u4_no_change.deleted is False + + +def test_d619fdfa6168(monkeypatch, session, make_user): + # Initialize db and migration environment + dburl = str(session.bind.url) + monkeypatch.setenv("GALAXY_CONFIG_OVERRIDE_DATABASE_CONNECTION", dburl) + monkeypatch.setenv("GALAXY_INSTALL_CONFIG_OVERRIDE_INSTALL_DATABASE_CONNECTION", dburl) + run_command(f"{COMMAND} init") + + # STEP 0: Load pre-migration state + run_command(f"{COMMAND} downgrade d2d8f51ebb7e") + + # STEP 1: Load users with duplicate usernames + + # Expect: oldest user preserved + u1 = make_user(username="a") + u2 = make_user(username="a") + u3 = make_user(username="a") + original_username = u3.username + assert u1.username == u2.username == u3.username + assert u1.create_time < u2.create_time < u3.create_time # u3 is newest user + + # STEP 2: Run migration + run_command(f"{COMMAND} upgrade d619fdfa6168") + session.expire_all() + + # STEP 3: Verify deduplicated results + u1_fixed = session.get(User, u1.id) + u2_fixed = session.get(User, u2.id) + u3_fixed = session.get(User, u3.id) + + # oldest user's username is preserved; the rest are deduplicated + assert u3_fixed.username == original_username + assert u1.username != u2.username != u3.username + # deduplicated users are marked as deleted + assert u1_fixed.deleted is True + assert u2_fixed.deleted is True + assert u3_fixed.deleted is False