Skip to content

Commit efb1997

Browse files
SergeyKleymanreakaleekv1v
authored
Fix case when process fork happens during request processing (#857)
* Start thread for background backend communications on demand and not on PHP extension request-init notification to correctly handle the situation when process forked after request-init notification * Add update-specs updatecli workflow (#861) * Fix sha source (#864) * Use composite action for updatecli workflow (#865) * Use composite action * Add env * Detect PID change (i.e., child fork) at every entry to agent code * Migrate Jenkinsfile 2 GH Actions Workflow (#863) * Migrate Jenkinsfile to GH Actions Workflow * Fix triggers * Update .github/workflows/test.yml * Fix * Fix php-upgrade * Add opentelemetry * Rename job * Add timeout * Adjust max-parallel * Fix needs * Fix * Move php rows to top So they are triggered first in the matrix and should optimize the workflow time * Move lifecycle on prod server rows to the top Should also optimze workflow time * Add test-reporter * Remove debugging step * Update .github/workflows/test-reporter.yml Co-authored-by: Victor Martinez <victormartinezrubio@gmail.com> * Migrate snapshoty * Extract snapshoty into distinct workflow * Update .github/workflows/snapshoty.yml * Run snapshoty only on main --------- Co-authored-by: Victor Martinez <victormartinezrubio@gmail.com> * action: run loop workflow (#867) * Start thread for background backend communications on demand and not on PHP extension request-init notification to correctly handle the situation when process forked after request-init notification * Detect PID change (i.e., child fork) at every entry to agent code --------- Co-authored-by: Jan Calanog <jan.calanog@elastic.co> Co-authored-by: Victor Martinez <victormartinezrubio@gmail.com>
1 parent 60d3e17 commit efb1997

15 files changed

+699
-293
lines changed

src/ext/ResultCode.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ enum ResultCode
2828
resultOutOfMemory,
2929
resultInvalidFormat,
3030
resultCurlFailure,
31+
resultSyncObjUseAfterFork,
3132
resultFailure
3233
};
3334
typedef enum ResultCode ResultCode;
@@ -45,6 +46,12 @@ static inline String resultCodeToString( ResultCode resultCode )
4546
case resultInvalidFormat:
4647
return "resultInvalidFormat";
4748

49+
case resultCurlFailure:
50+
return "resultCurlFailure";
51+
52+
case resultSyncObjUseAfterFork:
53+
return "resultSyncObjUseAfterFork";
54+
4855
case resultFailure:
4956
return "resultFailure";
5057

src/ext/Tracer.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ ResultCode ensureLoggerHasLatestConfig( Logger* logger, const ConfigSnapshot* co
6969
ELASTIC_APM_CALL_IF_FAILED_GOTO( reconfigureLogger( logger, &loggerConfig, config->logLevel ) );
7070

7171
resultCode = resultSuccess;
72-
7372
finally:
7473
return resultCode;
7574

src/ext/backend_comm.c

Lines changed: 63 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,6 @@ static void freeDataToSendQueue( DataToSendQueue* dataQueue )
418418

419419
struct BackgroundBackendComm
420420
{
421-
int refCount;
422421
Mutex* mutex;
423422
ConditionVariable* condVar;
424423
Thread* thread;
@@ -730,8 +729,10 @@ void* backgroundBackendCommThreadFunc( void* arg )
730729
goto finally;
731730
}
732731

733-
ResultCode unwindBackgroundBackendComm( BackgroundBackendComm** backgroundBackendCommOutPtr, const TimeSpec* timeoutAbsUtc )
732+
ResultCode unwindBackgroundBackendComm( BackgroundBackendComm** backgroundBackendCommOutPtr, const TimeSpec* timeoutAbsUtc, bool isCreatedByThisProcess )
734733
{
734+
ELASTIC_APM_LOG_DEBUG_FUNCTION_ENTRY_MSG( "isCreatedByThisProcess: %s", boolToString( isCreatedByThisProcess ) );
735+
735736
ELASTIC_APM_ASSERT_VALID_PTR( backgroundBackendCommOutPtr );
736737
// ELASTIC_APM_ASSERT_VALID_PTR( timeoutAbsUtc ); <- timeoutAbsUtc can be NULL
737738

@@ -744,12 +745,20 @@ ResultCode unwindBackgroundBackendComm( BackgroundBackendComm** backgroundBacken
744745
goto finally;
745746
}
746747

748+
if ( ! isCreatedByThisProcess )
749+
{
750+
ELASTIC_APM_LOG_DEBUG( "Deallocating memory related to background communication data structures inherited from parent process after fork"
751+
" without actually properly destroying synchronization primitives since it's impossible to do in child process"
752+
"; parent PID: %d"
753+
, (int)getParentProcessId() );
754+
}
755+
747756
if ( backgroundBackendComm->thread != NULL )
748757
{
749758
void* backgroundBackendCommThreadFuncRetVal = NULL;
750759
bool hasTimedOut;
751760
ELASTIC_APM_CALL_IF_FAILED_GOTO(
752-
timedJoinAndDeleteThread( &( backgroundBackendComm->thread ), &backgroundBackendCommThreadFuncRetVal, timeoutAbsUtc, &hasTimedOut, __FUNCTION__ ) );
761+
timedJoinAndDeleteThread( &( backgroundBackendComm->thread ), &backgroundBackendCommThreadFuncRetVal, timeoutAbsUtc, isCreatedByThisProcess, &hasTimedOut, __FUNCTION__ ) );
753762
if ( hasTimedOut )
754763
{
755764
ELASTIC_APM_LOG_ERROR( "Join to thread for background backend communications timed out - skipping the rest of cleanup and exiting" );
@@ -759,7 +768,7 @@ ResultCode unwindBackgroundBackendComm( BackgroundBackendComm** backgroundBacken
759768

760769
if ( backgroundBackendComm->condVar != NULL )
761770
{
762-
ELASTIC_APM_CALL_IF_FAILED_GOTO( deleteConditionVariable( &( backgroundBackendComm->condVar ) ) );
771+
ELASTIC_APM_CALL_IF_FAILED_GOTO( deleteConditionVariable( &( backgroundBackendComm->condVar ), isCreatedByThisProcess ) );
763772
}
764773

765774
if ( backgroundBackendComm->mutex != NULL )
@@ -772,13 +781,13 @@ ResultCode unwindBackgroundBackendComm( BackgroundBackendComm** backgroundBacken
772781
ELASTIC_APM_FREE_INSTANCE_AND_SET_TO_NULL( BackgroundBackendComm, *backgroundBackendCommOutPtr );
773782

774783
finally:
784+
ELASTIC_APM_LOG_DEBUG_RESULT_CODE_FUNCTION_EXIT();
775785
return resultCode;
776786

777787
failure:
778788
goto finally;
779789
}
780790

781-
static Mutex* g_backgroundBackendCommMutex = NULL;
782791
static BackgroundBackendComm* g_backgroundBackendComm = NULL;
783792

784793
static bool deriveAsyncBackendComm( const ConfigSnapshot* config, String* dbgReason )
@@ -799,47 +808,6 @@ static bool deriveAsyncBackendComm( const ConfigSnapshot* config, String* dbgRea
799808
return true;
800809
}
801810

802-
ResultCode backgroundBackendCommOnModuleInit( const ConfigSnapshot* config )
803-
{
804-
ELASTIC_APM_LOG_DEBUG_FUNCTION_ENTRY_MSG( "g_backgroundBackendCommMutex: %p; g_backgroundBackendComm: %p"
805-
, g_backgroundBackendCommMutex, g_backgroundBackendComm );
806-
807-
ResultCode resultCode;
808-
809-
if ( g_backgroundBackendCommMutex != NULL )
810-
{
811-
ELASTIC_APM_LOG_ERROR( "Unexpected state: g_backgroundBackendCommMutex != NULL" );
812-
ELASTIC_APM_SET_RESULT_CODE_AND_GOTO_FAILURE();
813-
}
814-
815-
if ( g_backgroundBackendComm != NULL )
816-
{
817-
ELASTIC_APM_LOG_ERROR( "Unexpected state: g_backgroundBackendComm != NULL" );
818-
ELASTIC_APM_SET_RESULT_CODE_AND_GOTO_FAILURE();
819-
}
820-
821-
String dbgAsyncBackendCommReason = NULL;
822-
if ( ! deriveAsyncBackendComm( config, &dbgAsyncBackendCommReason ) )
823-
{
824-
ELASTIC_APM_LOG_DEBUG( "async_backend_comm (asyncBackendComm) configuration option is %s - no need to start background backend comm", dbgAsyncBackendCommReason );
825-
resultCode = resultSuccess;
826-
goto finally;
827-
}
828-
829-
ELASTIC_APM_CALL_IF_FAILED_GOTO( newMutex( &( g_backgroundBackendCommMutex ), /* dbgDesc */ "g_backgroundBackendCommMutex" ) );
830-
831-
finally:
832-
ELASTIC_APM_LOG_DEBUG_RESULT_CODE_FUNCTION_EXIT();
833-
return resultCode;
834-
835-
failure:
836-
if ( g_backgroundBackendCommMutex != NULL )
837-
{
838-
deleteMutex( &g_backgroundBackendCommMutex );
839-
}
840-
goto finally;
841-
}
842-
843811
ResultCode newBackgroundBackendComm( const ConfigSnapshot* config, BackgroundBackendComm** backgroundBackendCommOut )
844812
{
845813
ELASTIC_APM_LOG_DEBUG_FUNCTION_ENTRY();
@@ -848,7 +816,6 @@ ResultCode newBackgroundBackendComm( const ConfigSnapshot* config, BackgroundBac
848816
BackgroundBackendComm* backgroundBackendComm = NULL;
849817

850818
ELASTIC_APM_MALLOC_INSTANCE_IF_FAILED_GOTO( BackgroundBackendComm, /* out */ backgroundBackendComm );
851-
backgroundBackendComm->refCount = 1;
852819
backgroundBackendComm->condVar = NULL;
853820
backgroundBackendComm->mutex = NULL;
854821
backgroundBackendComm->thread = NULL;
@@ -860,7 +827,6 @@ ResultCode newBackgroundBackendComm( const ConfigSnapshot* config, BackgroundBac
860827
ELASTIC_APM_CALL_IF_FAILED_GOTO( newMutex( &( backgroundBackendComm->mutex ), /* dbgDesc */ "Background backend communications" ) );
861828
ELASTIC_APM_CALL_IF_FAILED_GOTO( newConditionVariable( &( backgroundBackendComm->condVar ), /* dbgDesc */ "Background backend communications" ) );
862829

863-
backgroundBackendComm->refCount = 2;
864830
resultCode = newThread( &( backgroundBackendComm->thread )
865831
, &backgroundBackendCommThreadFunc
866832
, /* threadFuncArg: */ backgroundBackendComm
@@ -869,10 +835,6 @@ ResultCode newBackgroundBackendComm( const ConfigSnapshot* config, BackgroundBac
869835
{
870836
ELASTIC_APM_LOG_DEBUG( "Started thread for background backend communications; thread ID: %"PRIu64, getThreadId( backgroundBackendComm->thread ) );
871837
}
872-
else
873-
{
874-
--backgroundBackendComm->refCount;
875-
}
876838

877839
resultCode = resultSuccess;
878840
*backgroundBackendCommOut = backgroundBackendComm;
@@ -882,31 +844,15 @@ ResultCode newBackgroundBackendComm( const ConfigSnapshot* config, BackgroundBac
882844
return resultCode;
883845

884846
failure:
885-
unwindBackgroundBackendComm( &backgroundBackendComm, /* timeoutAbsUtc: */ NULL );
847+
unwindBackgroundBackendComm( &backgroundBackendComm, /* timeoutAbsUtc: */ NULL, /* isCreatedByThisProcess */ true );
886848
goto finally;
887849
}
888850

889-
ResultCode backgroundBackendCommOnRequestInit( const ConfigSnapshot* config )
851+
ResultCode backgroundBackendCommEnsureInited( const ConfigSnapshot* config )
890852
{
891-
ELASTIC_APM_LOG_DEBUG_FUNCTION_ENTRY_MSG( "g_backgroundBackendCommMutex: %p", g_backgroundBackendCommMutex );
853+
ELASTIC_APM_LOG_DEBUG_FUNCTION_ENTRY();
892854

893855
ResultCode resultCode;
894-
bool shouldUnlockMutex = false;
895-
896-
String dbgAsyncBackendCommReason = NULL;
897-
if ( ! deriveAsyncBackendComm( config, &dbgAsyncBackendCommReason ) )
898-
{
899-
ELASTIC_APM_LOG_DEBUG( "async_backend_comm (asyncBackendComm) configuration option is %s - no need to start background backend comm", dbgAsyncBackendCommReason );
900-
resultCode = resultSuccess;
901-
goto finally;
902-
}
903-
904-
if ( g_backgroundBackendCommMutex == NULL )
905-
{
906-
ELASTIC_APM_LOG_ERROR( "Unexpected state: g_backgroundBackendCommMutex == NULL" );
907-
ELASTIC_APM_SET_RESULT_CODE_AND_GOTO_FAILURE();
908-
}
909-
ELASTIC_APM_CALL_IF_FAILED_GOTO( lockMutex( g_backgroundBackendCommMutex, &shouldUnlockMutex, __FUNCTION__ ) );
910856

911857
if ( g_backgroundBackendComm == NULL )
912858
{
@@ -916,7 +862,6 @@ ResultCode backgroundBackendCommOnRequestInit( const ConfigSnapshot* config )
916862
resultCode = resultSuccess;
917863

918864
finally:
919-
unlockMutex( g_backgroundBackendCommMutex, &shouldUnlockMutex, __FUNCTION__ );
920865
ELASTIC_APM_LOG_DEBUG_RESULT_CODE_FUNCTION_EXIT();
921866
return resultCode;
922867

@@ -965,19 +910,16 @@ void backgroundBackendCommOnModuleShutdown()
965910
return;
966911
}
967912

968-
ELASTIC_APM_ASSERT( g_backgroundBackendCommMutex != NULL, "%p", g_backgroundBackendCommMutex );
969-
970913
ResultCode resultCode;
971914
TimeSpec shouldExitBy;
972915

973916
ELASTIC_APM_CALL_IF_FAILED_GOTO( signalBackgroundBackendCommThreadToExit( backgroundBackendComm, /* out */ &shouldExitBy ) );
974-
ELASTIC_APM_CALL_IF_FAILED_GOTO( unwindBackgroundBackendComm( &backgroundBackendComm, &shouldExitBy ) );
917+
ELASTIC_APM_CALL_IF_FAILED_GOTO( unwindBackgroundBackendComm( &backgroundBackendComm, &shouldExitBy, /* isCreatedByThisProcess */ true ) );
975918
resultCode = resultSuccess;
976919

977920
finally:
978921

979922
g_backgroundBackendComm = NULL;
980-
ELASTIC_APM_CALL_IF_FAILED_GOTO( deleteMutex( &g_backgroundBackendCommMutex ) );
981923
return;
982924

983925
failure:
@@ -1082,11 +1024,8 @@ ResultCode sendEventsToApmServerWithDataConvertedForSync(
10821024
, serializedEvents ) );
10831025

10841026
resultCode = resultSuccess;
1085-
10861027
finally:
1087-
10881028
freeMallocedStringBuffer( /* in,out */ &userAgentHttpHeaderWithTermNull );
1089-
10901029
return resultCode;
10911030

10921031
failure:
@@ -1100,7 +1039,9 @@ ResultCode sendEventsToApmServer(
11001039
, StringView userAgentHttpHeader
11011040
, StringView serializedEvents )
11021041
{
1042+
ResultCode resultCode;
11031043
long serverTimeoutMillisecondsLong = (long) ceil( serverTimeoutMilliseconds );
1044+
11041045
ELASTIC_APM_LOG_DEBUG(
11051046
"Handling request to send events..."
11061047
" disableSend: %s"
@@ -1113,15 +1054,49 @@ ResultCode sendEventsToApmServer(
11131054
, (UInt64) serializedEvents.length, (int) serializedEvents.length, serializedEvents.begin );
11141055

11151056
String dbgAsyncBackendCommReason = NULL;
1116-
if ( ! deriveAsyncBackendComm( config, &dbgAsyncBackendCommReason ) )
1057+
bool shouldSendAsync = deriveAsyncBackendComm( config, &dbgAsyncBackendCommReason );
1058+
ELASTIC_APM_LOG_DEBUG( "async_backend_comm (asyncBackendComm) configuration option is %s - sending events %s"
1059+
, dbgAsyncBackendCommReason, ( shouldSendAsync ? "asynchronously" : "synchronously" ) );
1060+
if ( shouldSendAsync )
1061+
{
1062+
ELASTIC_APM_CALL_IF_FAILED_GOTO( backgroundBackendCommEnsureInited( config ) );
1063+
ELASTIC_APM_CALL_IF_FAILED_GOTO( enqueueEventsToSendToApmServer( disableSend, serverTimeoutMilliseconds, userAgentHttpHeader, serializedEvents ) );
1064+
}
1065+
else
1066+
{
1067+
ELASTIC_APM_CALL_IF_FAILED_GOTO( sendEventsToApmServerWithDataConvertedForSync(
1068+
disableSend
1069+
, serverTimeoutMilliseconds
1070+
, config
1071+
, userAgentHttpHeader
1072+
, serializedEvents ) );
1073+
}
1074+
1075+
resultCode = resultSuccess;
1076+
finally:
1077+
ELASTIC_APM_LOG_DEBUG_RESULT_CODE_FUNCTION_EXIT();
1078+
return resultCode;
1079+
1080+
failure:
1081+
goto finally;
1082+
}
1083+
1084+
ResultCode resetBackgroundBackendCommStateInForkedChild()
1085+
{
1086+
ELASTIC_APM_LOG_DEBUG_FUNCTION_ENTRY_MSG( "g_backgroundBackendComm %s NULL", (g_backgroundBackendComm == NULL) ? "==" : "!=" );
1087+
1088+
ResultCode resultCode;
1089+
1090+
if ( g_backgroundBackendComm != NULL )
11171091
{
1118-
ELASTIC_APM_LOG_DEBUG( "async_backend_comm (asyncBackendComm) configuration option is %s - sending events synchronously", dbgAsyncBackendCommReason );
1119-
return sendEventsToApmServerWithDataConvertedForSync( disableSend
1120-
, serverTimeoutMilliseconds
1121-
, config
1122-
, userAgentHttpHeader
1123-
, serializedEvents );
1092+
ELASTIC_APM_CALL_IF_FAILED_GOTO( unwindBackgroundBackendComm( &g_backgroundBackendComm, /* timeoutAbsUtc: */ NULL, /* isCreatedByThisProcess */ false ) );
11241093
}
11251094

1126-
return enqueueEventsToSendToApmServer( disableSend, serverTimeoutMilliseconds, userAgentHttpHeader, serializedEvents );
1127-
}
1095+
resultCode = resultSuccess;
1096+
finally:
1097+
ELASTIC_APM_LOG_DEBUG_RESULT_CODE_FUNCTION_EXIT();
1098+
return resultCode;
1099+
1100+
failure:
1101+
goto finally;
1102+
}

src/ext/backend_comm.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@
2323
#include "ConfigManager.h"
2424
#include "ResultCode.h"
2525

26-
ResultCode backgroundBackendCommOnModuleInit( const ConfigSnapshot* config );
27-
28-
ResultCode backgroundBackendCommOnRequestInit( const ConfigSnapshot* config );
29-
3026
ResultCode sendEventsToApmServer(
3127
bool disableSend
3228
, double serverTimeoutMilliseconds
@@ -35,3 +31,5 @@ ResultCode sendEventsToApmServer(
3531
, StringView serializedEvents );
3632

3733
void backgroundBackendCommOnModuleShutdown();
34+
35+
ResultCode resetBackgroundBackendCommStateInForkedChild();

0 commit comments

Comments
 (0)