From 2733a6766fe1e9bc84b187cd50b066552fb84216 Mon Sep 17 00:00:00 2001 From: Michael Johnson Date: Fri, 17 May 2024 16:46:33 +0000 Subject: [PATCH 1/2] Fix failover when using SRIOV VIP The previous patch[1] that added a check for Amphora status missed adding a task that generates the Amphora status information when a failvoer is triggered. This patch corrects that by adding the AmphoraeGetConnectivityStatus task to the get amphora for lb failover subflow. [1] https://review.opendev.org/c/openstack/octavia/+/912599 Change-Id: I9150da04214f470060020123813bb71235acbc20 (cherry picked from commit a4317a80d61ce2d8e699ea7ca17f161ecc4db8ee) --- .../controller/worker/v2/flows/amphora_flows.py | 17 ++++++++++++++--- .../worker/v2/flows/test_load_balancer_flows.py | 10 ++++------ ...ailover-for-SRIOV-VIPs-e2ab193c0de5eb1d.yaml | 4 ++++ 3 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 releasenotes/notes/Fix-failover-for-SRIOV-VIPs-e2ab193c0de5eb1d.yaml diff --git a/octavia/controller/worker/v2/flows/amphora_flows.py b/octavia/controller/worker/v2/flows/amphora_flows.py index 77ae13259..eaff37cf8 100644 --- a/octavia/controller/worker/v2/flows/amphora_flows.py +++ b/octavia/controller/worker/v2/flows/amphora_flows.py @@ -404,7 +404,7 @@ def update_amphora_config_flow(self): def get_amphora_for_lb_failover_subflow( self, prefix, role=constants.ROLE_STANDALONE, failed_amp_vrrp_port_id=None, is_vrrp_ipv6=False, - flavor_dict=None): + flavor_dict=None, timeout_dict=None): """Creates a new amphora that will be used in a failover flow. :requires: loadbalancer_id, flavor, vip, vip_sg_id, loadbalancer @@ -488,13 +488,24 @@ def get_amphora_for_lb_failover_subflow( rebind={constants.AMPHORAE: constants.NEW_AMPHORAE}, provides=constants.AMPHORA_FIREWALL_RULES, inject={constants.AMPHORA_INDEX: 0})) + amp_for_failover_flow.add( + amphora_driver_tasks.AmphoraeGetConnectivityStatus( + name=(prefix + '-' + + constants.AMPHORAE_GET_CONNECTIVITY_STATUS), + requires=constants.AMPHORAE, + rebind={constants.AMPHORAE: constants.NEW_AMPHORAE}, + inject={constants.TIMEOUT_DICT: timeout_dict, + constants.NEW_AMPHORA_ID: constants.NIL_UUID}, + provides=constants.AMPHORAE_STATUS)) amp_for_failover_flow.add( amphora_driver_tasks.SetAmphoraFirewallRules( name=prefix + '-' + constants.SET_AMPHORA_FIREWALL_RULES, requires=(constants.AMPHORAE, - constants.AMPHORA_FIREWALL_RULES), + constants.AMPHORA_FIREWALL_RULES, + constants.AMPHORAE_STATUS), rebind={constants.AMPHORAE: constants.NEW_AMPHORAE}, - inject={constants.AMPHORA_INDEX: 0})) + inject={constants.AMPHORA_INDEX: 0, + constants.TIMEOUT_DICT: timeout_dict})) # Plug member ports amp_for_failover_flow.add(network_tasks.CalculateAmphoraDelta( diff --git a/octavia/tests/unit/controller/worker/v2/flows/test_load_balancer_flows.py b/octavia/tests/unit/controller/worker/v2/flows/test_load_balancer_flows.py index 4b8a4d0e0..fc48ce8b9 100644 --- a/octavia/tests/unit/controller/worker/v2/flows/test_load_balancer_flows.py +++ b/octavia/tests/unit/controller/worker/v2/flows/test_load_balancer_flows.py @@ -344,7 +344,6 @@ def _test_get_failover_LB_flow_single(self, amphorae): self.assertIn(constants.FLAVOR, failover_flow.requires) self.assertIn(constants.LOADBALANCER, failover_flow.requires) self.assertIn(constants.LOADBALANCER_ID, failover_flow.requires) - self.assertIn(constants.AMPHORAE_STATUS, failover_flow.requires) self.assertIn(constants.UPDATED_PORTS, failover_flow.provides) self.assertIn(constants.AMPHORA, failover_flow.provides) @@ -364,9 +363,9 @@ def _test_get_failover_LB_flow_single(self, amphorae): self.assertIn(constants.SUBNET, failover_flow.provides) self.assertIn(constants.NEW_AMPHORAE, failover_flow.provides) - self.assertEqual(7, len(failover_flow.requires), + self.assertEqual(6, len(failover_flow.requires), failover_flow.requires) - self.assertEqual(16, len(failover_flow.provides), + self.assertEqual(17, len(failover_flow.provides), failover_flow.provides) @mock.patch('octavia.common.rpc.NOTIFIER', @@ -424,7 +423,6 @@ def _test_get_failover_LB_flow_no_amps_act_stdby(self, amphorae): self.assertIn(constants.FLAVOR, failover_flow.requires) self.assertIn(constants.LOADBALANCER, failover_flow.requires) self.assertIn(constants.LOADBALANCER_ID, failover_flow.requires) - self.assertIn(constants.AMPHORAE_STATUS, failover_flow.requires) self.assertIn(constants.UPDATED_PORTS, failover_flow.provides) self.assertIn(constants.AMPHORA, failover_flow.provides) @@ -445,9 +443,9 @@ def _test_get_failover_LB_flow_no_amps_act_stdby(self, amphorae): self.assertIn(constants.SUBNET, failover_flow.provides) self.assertIn(constants.NEW_AMPHORAE, failover_flow.provides) - self.assertEqual(7, len(failover_flow.requires), + self.assertEqual(6, len(failover_flow.requires), failover_flow.requires) - self.assertEqual(16, len(failover_flow.provides), + self.assertEqual(17, len(failover_flow.provides), failover_flow.provides) @mock.patch('octavia.common.rpc.NOTIFIER', diff --git a/releasenotes/notes/Fix-failover-for-SRIOV-VIPs-e2ab193c0de5eb1d.yaml b/releasenotes/notes/Fix-failover-for-SRIOV-VIPs-e2ab193c0de5eb1d.yaml new file mode 100644 index 000000000..5744b6bbf --- /dev/null +++ b/releasenotes/notes/Fix-failover-for-SRIOV-VIPs-e2ab193c0de5eb1d.yaml @@ -0,0 +1,4 @@ +--- +fixes: + - | + Fixed an issue when failing over load balancers using SR-IOV VIP ports. From 2e4a03e5c85556b19a019108022f399341950360 Mon Sep 17 00:00:00 2001 From: Sergey Kraynev Date: Wed, 23 Oct 2024 17:57:44 +0400 Subject: [PATCH 2/2] Remove amphora_health record on revert CreateAmphoraInDB Record in amphora_health created by first UDP request from booted amphora. It's possible to run creation/failover LB where Amphora will be booted and reverted on the final tasks (e.g. AmphoraPostVIPPlug) as result record in amphora_health will be created, but not be removed on revert. This patch adds logic to remove amphora_health record on revert flow. Closes-Bug: 2085530 Change-Id: Ia8b4612cd25a68b44d55b113f704756818eb8a5b (cherry picked from commit f76fa1ae330e933e86c666b857acfee06f675adb) --- .../worker/v2/tasks/database_tasks.py | 16 +++++---- .../worker/v2/tasks/test_database_tasks.py | 34 +++++++++++++++---- ...ow_on_amphora_revert-082f94459ecacaa2.yaml | 7 ++++ 3 files changed, 45 insertions(+), 12 deletions(-) create mode 100644 releasenotes/notes/delete_amphora_health_row_on_amphora_revert-082f94459ecacaa2.yaml diff --git a/octavia/controller/worker/v2/tasks/database_tasks.py b/octavia/controller/worker/v2/tasks/database_tasks.py index e78ef41d7..48b4d92d4 100644 --- a/octavia/controller/worker/v2/tasks/database_tasks.py +++ b/octavia/controller/worker/v2/tasks/database_tasks.py @@ -132,13 +132,17 @@ def revert(self, result, *args, **kwargs): LOG.warning("Reverting create amphora in DB for amp id %s ", result) # Delete the amphora for now. May want to just update status later - try: - with db_apis.session().begin() as session: + with db_apis.session().begin() as session: + try: self.amphora_repo.delete(session, id=result) - except Exception as e: - LOG.error("Failed to delete amphora %(amp)s " - "in the database due to: " - "%(except)s", {'amp': result, 'except': str(e)}) + except Exception as e: + LOG.error("Failed to delete amphora %(amp)s " + "in the database due to: " + "%(except)s", {'amp': result, 'except': str(e)}) + try: + self.amp_health_repo.delete(session, amphora_id=result) + except Exception: + pass class MarkLBAmphoraeDeletedInDB(BaseDatabaseTask): diff --git a/octavia/tests/unit/controller/worker/v2/tasks/test_database_tasks.py b/octavia/tests/unit/controller/worker/v2/tasks/test_database_tasks.py index 8b471b8bc..25d267085 100644 --- a/octavia/tests/unit/controller/worker/v2/tasks/test_database_tasks.py +++ b/octavia/tests/unit/controller/worker/v2/tasks/test_database_tasks.py @@ -184,7 +184,9 @@ def setUp(self): @mock.patch('octavia.db.repositories.AmphoraRepository.create', return_value=_db_amphora_mock) + @mock.patch('octavia.db.repositories.AmphoraHealthRepository.delete') def test_create_amphora_in_db(self, + mock_amphora_health_repo_delete, mock_create, mock_generate_uuid, mock_LOG, @@ -210,23 +212,43 @@ def test_create_amphora_in_db(self, # Test the revert create_amp_in_db.revert(_tf_failure_mock) - self.assertFalse(mock_amphora_repo_delete.called) + mock_amphora_repo_delete.assert_not_called() + mock_amphora_health_repo_delete.assert_not_called() + amp_id = 'AMP' mock_amphora_repo_delete.reset_mock() - create_amp_in_db.revert(result='AMP') + mock_amphora_health_repo_delete.reset_mock() + create_amp_in_db.revert(result=amp_id) self.assertTrue(mock_amphora_repo_delete.called) + self.assertTrue(mock_amphora_health_repo_delete.called) mock_amphora_repo_delete.assert_called_once_with( mock_session, - id='AMP') + id=amp_id) + mock_amphora_health_repo_delete.assert_called_once_with( + mock_session, + amphora_id=amp_id) + mock_LOG.error.assert_not_called() + mock_LOG.debug.assert_not_called() # Test revert with exception mock_amphora_repo_delete.reset_mock() - mock_amphora_repo_delete.side_effect = Exception('fail') - create_amp_in_db.revert(result='AMP') + mock_amphora_health_repo_delete.reset_mock() + err1_msg, err2_msg = ('fail', 'fail2') + mock_amphora_repo_delete.side_effect = Exception(err1_msg) + mock_amphora_health_repo_delete.side_effect = Exception(err2_msg) + create_amp_in_db.revert(result=amp_id) self.assertTrue(mock_amphora_repo_delete.called) + self.assertTrue(mock_amphora_health_repo_delete.called) mock_amphora_repo_delete.assert_called_once_with( mock_session, - id='AMP') + id=amp_id) + mock_amphora_health_repo_delete.assert_called_once_with( + mock_session, + amphora_id=amp_id) + mock_LOG.error.assert_called_once_with( + "Failed to delete amphora %(amp)s " + "in the database due to: " + "%(except)s", {'amp': amp_id, 'except': err1_msg}) @mock.patch('octavia.db.repositories.ListenerRepository.delete') def test_delete_listener_in_db(self, diff --git a/releasenotes/notes/delete_amphora_health_row_on_amphora_revert-082f94459ecacaa2.yaml b/releasenotes/notes/delete_amphora_health_row_on_amphora_revert-082f94459ecacaa2.yaml new file mode 100644 index 000000000..39b7266fb --- /dev/null +++ b/releasenotes/notes/delete_amphora_health_row_on_amphora_revert-082f94459ecacaa2.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Remove record in amphora_health table on revert. It's necessary, because + record in amphora table for corresponding amphora also deleted. + It allows to avoid false positive react of failover threshold due to + orphan records in amphora_health table.