From 66d3fb1a7cde32d4d99307629bca504da22ab1e4 Mon Sep 17 00:00:00 2001 From: Jeffrey Mendelsohn Date: Thu, 17 Oct 2024 17:48:29 -0400 Subject: [PATCH] lost signal reproduction (#5025) * lost signal reproduction * correct initialization of a variable --- groups/bdl/bdlmt/bdlmt_fixedthreadpool.t.cpp | 91 +++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/groups/bdl/bdlmt/bdlmt_fixedthreadpool.t.cpp b/groups/bdl/bdlmt/bdlmt_fixedthreadpool.t.cpp index 5238d574c4..2bff3c19cb 100644 --- a/groups/bdl/bdlmt/bdlmt_fixedthreadpool.t.cpp +++ b/groups/bdl/bdlmt/bdlmt_fixedthreadpool.t.cpp @@ -6,6 +6,9 @@ #include #include +#include +#include + #include #include #include @@ -71,7 +74,10 @@ using bsl::flush; // which are controlled by the test case. // // In addition to positive test cases (run in the nightly builds), a negative -// test case -1 can be run manually to measure performance of enqueuing jobs. +// test case -1 can be run manually to measure performance of enqueuing jobs, +// and test case -3 can be run to reproduce the lost condition signal issue in +// the underlying implementation of condition variable (e.g., +// https://sourceware.org/bugzilla/show_bug.cgi?id=25847). // // [ 3] bdlmt::FixedThreadPool(numThreads, maxNumPendingJobs, *bA); // [ 3] bdlmt::FixedThreadPool(nT, maxNPJ, mI, *mR, *bA); @@ -644,6 +650,21 @@ void performanceTest(FILE *outputFile, s_performanceTestPool_p = 0; } +void testJobRecordNowMicroseconds(bsls::AtomicInt64 *now) +{ + *now = bsls::SystemTime::nowMonotonicClock().totalMicroseconds(); +} + +extern "C" void *logActivity(void *) +{ + while (1) { + bsl::cout << "*** " << bdlt::CurrentTime::local() << bsl::endl; + bslmt::ThreadUtil::microSleep(k_DECISECOND * 600 * 5); + } + + return 0; +} + // ============================================================================ // USAGE EXAMPLE // ---------------------------------------------------------------------------- @@ -2982,6 +3003,74 @@ int main(int argc, char *argv[]) fclose(f); } break; + case -3: { + // -------------------------------------------------------------------- + // LOST CONDITION SIGNAL REPRODUCTION + // + // Concerns: + // 1. The underlying implementation has a lost signal bug in condition + // variable (e.g., + // https://sourceware.org/bugzilla/show_bug.cgi?id=25847). + // + // Plan: + // 1. Create a `bdlmt::FixedThreadPool`. Repeatedly enqueue a job and + // wait for the pool to empty. If the output stops having the text + // from the pool threads (that should occur every minute), the issue + // has been reproduced. An additional thread outputs text every + // five minutes to aid in detection. Note that this test typically + // requires *days* to reproduce the issue. + // -------------------------------------------------------------------- + + if (verbose) { + cout << "LOST CONDITION SIGNAL REPRODUCTION" << endl + << "==================================" << endl; + } + + bslmt::FastPostSemaphoreImplWorkaroundUtil:: + removePostAlwaysSignalsMitigation(); + + bdlmt::FixedThreadPool pool(4, 32); + pool.start(); + while (!pool.isStarted()) { + bslmt::ThreadUtil::yield(); + } + + bslmt::ThreadUtil::Handle logActivityHandle; + + bslmt::ThreadUtil::create(&logActivityHandle, + logActivity, + 0); + + bdlt::Datetime lastLog = bdlt::CurrentTime::local(); + bsl::cout << lastLog << bsl::endl; + + bsls::AtomicInt64 stop; + stop = 0; + + const Obj::Job job(bdlf::BindUtil::bind(&testJobRecordNowMicroseconds, + &stop)); + + while (1) { + bslmt::ThreadUtil::yield(); + bslmt::ThreadUtil::yield(); + bslmt::ThreadUtil::yield(); + + stop = 0; + bdlt::Datetime start = bdlt::CurrentTime::local(); + pool.enqueueJob(job); + while (0 == stop) { + bslmt::ThreadUtil::yield(); + } + while (0 != pool.numActiveThreads()) { + bslmt::ThreadUtil::yield(); + } + + if (1 <= (start - lastLog).totalMinutes()) { + bsl::cout << start << bsl::endl; + lastLog = start; + } + } + } break; default: { cerr << "WARNING: CASE `" << test << "' NOT FOUND." << endl; testStatus = -1;