diff --git a/syncd/Syncd.cpp b/syncd/Syncd.cpp index 8a1cb186..6c51ecb9 100644 --- a/syncd/Syncd.cpp +++ b/syncd/Syncd.cpp @@ -350,6 +350,38 @@ void Syncd::processEvent( while (!consumer.empty()); } +void Syncd::processEventInShutdownWaitMode( + _In_ sairedis::SelectableChannel& consumer) +{ + SWSS_LOG_ENTER(); + + // Syncd in shutdown-wait mode must respond to INIT_VIEW with FAILURE to avoid deadlock with OA + // This could happen because Orchagent sends INIT_VIEW before registering shutdown callback + // Can't reorder due to circular dependency: need switch to register callbacks, but + // need INIT_VIEW before creating switch + do + { + swss::KeyOpFieldsValuesTuple kco; + consumer.pop(kco, false); + + auto& op = kfvOp(kco); + auto& key = kfvKey(kco); + + SWSS_LOG_WARN("Received command while in shutdown-wait mode: op=%s, key=%s.", op.c_str(), key.c_str()); + + if (op == REDIS_ASIC_STATE_COMMAND_NOTIFY) + { + SWSS_LOG_ERROR("Syncd is waiting for shutdown, cannot process %s: Sending FAILURE response.", key.c_str()); + sendNotifyResponse(SAI_STATUS_FAILURE); + } + else + { + SWSS_LOG_WARN("Ignoring non-notify command in shutdown-wait mode"); + } + } + while (!consumer.empty()); +} + sai_status_t Syncd::processSingleEvent( _In_ const swss::KeyOpFieldsValuesTuple &kco) { @@ -5566,6 +5598,8 @@ void Syncd::run() volatile bool runMainLoop = true; + bool inShutdownWaitMode = false; + std::shared_ptr s = std::make_shared(); try @@ -5603,6 +5637,9 @@ void Syncd::run() s = std::make_shared(); s->addSelectable(m_restartQuery.get()); + s->addSelectable(m_selectableChannel.get()); + + inShutdownWaitMode = true; SWSS_LOG_NOTICE("starting main loop, ONLY restart query"); @@ -5721,7 +5758,14 @@ void Syncd::run() } else if (sel == m_selectableChannel.get()) { - processEvent(*m_selectableChannel.get()); + if (inShutdownWaitMode) + { + processEventInShutdownWaitMode(*m_selectableChannel.get()); + } + else + { + processEvent(*m_selectableChannel.get()); + } } else { @@ -5730,13 +5774,16 @@ void Syncd::run() } catch(const std::exception &e) { - SWSS_LOG_ERROR("Runtime error: %s", e.what()); + SWSS_LOG_ERROR("Runtime error: %s - entering shutdown-wait mode", e.what()); sendShutdownRequestAfterException(); s = std::make_shared(); s->addSelectable(m_restartQuery.get()); + s->addSelectable(m_selectableChannel.get()); + + inShutdownWaitMode = true; if (m_commandLineOptions->m_disableExitSleep) runMainLoop = false; diff --git a/syncd/Syncd.h b/syncd/Syncd.h index bd59fcc2..6b9a9414 100644 --- a/syncd/Syncd.h +++ b/syncd/Syncd.h @@ -67,6 +67,9 @@ namespace syncd void processEvent( _In_ sairedis::SelectableChannel& consumer); + void processEventInShutdownWaitMode( + _In_ sairedis::SelectableChannel& consumer); + sai_status_t processQuadEventInInitViewMode( _In_ sai_object_type_t objectType, _In_ const std::string& strObjectId, diff --git a/unittest/syncd/TestSyncd.cpp b/unittest/syncd/TestSyncd.cpp index 22fa5618..6d74c07f 100644 --- a/unittest/syncd/TestSyncd.cpp +++ b/unittest/syncd/TestSyncd.cpp @@ -627,4 +627,70 @@ TEST_F(SyncdTest, BulkRemoveTest) }; m_syncd->processEvent(*channel); } + +TEST_F(SyncdTest, processEventInShutdownWaitMode_NotifyCommand) +{ + // Test that NOTIFY commands receive FAILURE response in shutdown-wait mode + auto channel = std::make_shared(); + + // Set up the mock channel as the syncd's selectable channel + // so sendNotifyResponse will use it + m_syncd->m_selectableChannel = channel; + + int popCallCount = 0; + EXPECT_CALL(*channel, empty()) + .WillRepeatedly([&popCallCount]() { + return popCallCount > 0; + }); + + EXPECT_CALL(*channel, pop(testing::_, testing::_)) + .WillOnce(testing::DoAll( + testing::Invoke([](swss::KeyOpFieldsValuesTuple& kco, bool initViewMode) { + kfvKey(kco) = SYNCD_INIT_VIEW; + kfvOp(kco) = REDIS_ASIC_STATE_COMMAND_NOTIFY; + }), + testing::Invoke([&popCallCount](swss::KeyOpFieldsValuesTuple&, bool) { + popCallCount++; + }) + )); + + // Verify that set() is called with FAILURE status response + std::string expectedStatus = sai_serialize_status(SAI_STATUS_FAILURE); + EXPECT_CALL(*channel, set(expectedStatus, testing::_, REDIS_ASIC_STATE_COMMAND_NOTIFY)) + .Times(1); + + m_syncd->processEventInShutdownWaitMode(*channel); +} + +TEST_F(SyncdTest, processEventInShutdownWaitMode_NonNotifyCommand) +{ + // Test that non-NOTIFY commands are ignored (no response sent) in shutdown-wait mode + auto channel = std::make_shared(); + + // Set up the mock channel as the syncd's selectable channel + m_syncd->m_selectableChannel = channel; + + int popCallCount = 0; + EXPECT_CALL(*channel, empty()) + .WillRepeatedly([&popCallCount]() { + return popCallCount > 0; + }); + + EXPECT_CALL(*channel, pop(testing::_, testing::_)) + .WillOnce(testing::DoAll( + testing::Invoke([](swss::KeyOpFieldsValuesTuple& kco, bool initViewMode) { + kfvKey(kco) = "SAI_OBJECT_TYPE_SWITCH:oid:0x21000000000000"; + kfvOp(kco) = REDIS_ASIC_STATE_COMMAND_CREATE; + }), + testing::Invoke([&popCallCount](swss::KeyOpFieldsValuesTuple&, bool) { + popCallCount++; + }) + )); + + // Verify that set() is NOT called for non-notify commands + EXPECT_CALL(*channel, set(testing::_, testing::_, testing::_)) + .Times(0); + + m_syncd->processEventInShutdownWaitMode(*channel); +} #endif