From b7ee964bbc8e8bacab1296cb7f4a8e4836a39728 Mon Sep 17 00:00:00 2001 From: Gregory LEOCADIE Date: Thu, 1 Dec 2022 14:47:28 +0100 Subject: [PATCH] [Profiler] Improve CodeHotspots (#3501) * Fix sleep API we use to avoid non-precise wakeup time * Add new list of managed thread: the ones that already have a trace context attached. --- .../src/Demos/Samples.BuggyBits/Program.cs | 16 ++- .../Demos/Samples.BuggyBits/SelfInvoker.cs | 16 ++- .../Windows32BitStackFramesCollector.cpp | 1 - .../Windows64BitStackFramesCollector.cpp | 1 - .../Datadog.Profiler.Native/Configuration.cpp | 13 ++ .../Datadog.Profiler.Native/Configuration.h | 3 + .../CorProfilerCallback.cpp | 11 +- .../CorProfilerCallback.h | 2 + .../EnvironmentVariables.h | 1 + .../Datadog.Profiler.Native/IConfiguration.h | 1 + .../IManagedThreadList.h | 1 + .../ManagedThreadInfo.h | 13 ++ .../ManagedThreadList.cpp | 23 +++- .../ManagedThreadList.h | 1 + .../Datadog.Profiler.Native/OpSysTools.cpp | 11 +- .../Datadog.Profiler.Native/OpSysTools.h | 2 + .../Datadog.Profiler.Native/PInvoke.cpp | 2 + .../StackSamplerLoop.cpp | 120 ++++++++++++++---- .../StackSamplerLoop.h | 14 +- .../StackSamplerLoopManager.cpp | 3 + .../StackSamplerLoopManager.h | 2 + .../CodeHotspot/CodeHotspotTest.cs | 105 ++++++++++++++- .../ProfilerMockedInterface.h | 1 + 23 files changed, 323 insertions(+), 40 deletions(-) diff --git a/profiler/src/Demos/Samples.BuggyBits/Program.cs b/profiler/src/Demos/Samples.BuggyBits/Program.cs index 9257a51d7782..426dc38ba1af 100644 --- a/profiler/src/Demos/Samples.BuggyBits/Program.cs +++ b/profiler/src/Demos/Samples.BuggyBits/Program.cs @@ -34,7 +34,7 @@ public static async Task Main(string[] args) EnvironmentInfo.PrintDescriptionToConsole(); - ParseCommandLine(args, out var timeout, out var iterations, out var scenario); + ParseCommandLine(args, out var timeout, out var iterations, out var scenario, out var nbIdleThreads); using (var host = CreateHostBuilder(args).Build()) { @@ -53,7 +53,7 @@ public static async Task Main(string[] args) WriteLine($"Listening to {rootUrl}"); var cts = new CancellationTokenSource(); - using (var selfInvoker = new SelfInvoker(cts.Token, scenario)) + using (var selfInvoker = new SelfInvoker(cts.Token, scenario, nbIdleThreads)) { await host.StartAsync(); @@ -121,12 +121,13 @@ public static IHostBuilder CreateHostBuilder(string[] args) => webBuilder.UseStartup(); }); - private static void ParseCommandLine(string[] args, out TimeSpan timeout, out int iterations, out Scenario scenario) + private static void ParseCommandLine(string[] args, out TimeSpan timeout, out int iterations, out Scenario scenario, out int nbIdleThreads) { // by default, need interactive action to exit and string.Concat scenario timeout = TimeSpan.MinValue; iterations = 0; scenario = Scenario.StringConcat; + nbIdleThreads = 0; for (int i = 0; i < args.Length; i++) { @@ -171,6 +172,15 @@ private static void ParseCommandLine(string[] args, out TimeSpan timeout, out in { timeout = Timeout.InfiniteTimeSpan; } + else + if ("--with-idle-threads".Equals(arg, StringComparison.OrdinalIgnoreCase)) + { + var nbThreadsArgument = i + 1; + if (nbThreadsArgument >= args.Length || !int.TryParse(args[nbThreadsArgument], out nbIdleThreads)) + { + throw new InvalidOperationException($"Invalid or missing count after --with-idle-threads"); + } + } } // sanity checks diff --git a/profiler/src/Demos/Samples.BuggyBits/SelfInvoker.cs b/profiler/src/Demos/Samples.BuggyBits/SelfInvoker.cs index 582eb93f781d..941b183c0342 100644 --- a/profiler/src/Demos/Samples.BuggyBits/SelfInvoker.cs +++ b/profiler/src/Demos/Samples.BuggyBits/SelfInvoker.cs @@ -18,12 +18,14 @@ public class SelfInvoker : IDisposable private readonly CancellationToken _exitToken; private readonly HttpClient _httpClient; private readonly Scenario _scenario; + private readonly int _nbIdleThreads; - public SelfInvoker(CancellationToken token, Scenario scenario) + public SelfInvoker(CancellationToken token, Scenario scenario, int nbIdleThreds) { _exitToken = token; _httpClient = new HttpClient(); _scenario = scenario; + _nbIdleThreads = nbIdleThreds; } public void Dispose() @@ -35,6 +37,8 @@ public async Task RunAsync(string rootUrl, int iterations = 0) { Console.WriteLine($"{this.GetType().Name} started."); + CreateIdleThreads(); + if (_scenario == Scenario.None) { await Task.Delay(Timeout.Infinite, _exitToken); @@ -67,6 +71,16 @@ public async Task RunAsync(string rootUrl, int iterations = 0) Console.WriteLine($"{this.GetType().Name} stopped."); } + private void CreateIdleThreads() + { + Console.WriteLine($"----- Creating {_nbIdleThreads} idle threads"); + + for (var i = 0; i < _nbIdleThreads; i++) + { + Task.Factory.StartNew(() => { _exitToken.WaitHandle.WaitOne(); }, TaskCreationOptions.LongRunning); + } + } + private string GetEndpoint(string rootUrl) { switch (_scenario) diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows32BitStackFramesCollector.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows32BitStackFramesCollector.cpp index 93361d52e73b..7eb5ca8ac991 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows32BitStackFramesCollector.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows32BitStackFramesCollector.cpp @@ -30,7 +30,6 @@ StackSnapshotResultBuffer* Windows32BitStackFramesCollector::CollectStackSampleI { // Collect data for TraceContext Tracking: bool traceContextDataCollected = this->TryApplyTraceContextDataFromCurrentCollectionThreadToSnapshot(); - assert(traceContextDataCollected); // Now walk the stack: __try diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows64BitStackFramesCollector.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows64BitStackFramesCollector.cpp index e46195c6f7fa..54f3d863732d 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows64BitStackFramesCollector.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Windows/Windows64BitStackFramesCollector.cpp @@ -181,7 +181,6 @@ StackSnapshotResultBuffer* Windows64BitStackFramesCollector::CollectStackSampleI { // Collect data for TraceContext Tracking: bool traceContextDataCollected = this->TryApplyTraceContextDataFromCurrentCollectionThreadToSnapshot(); - assert(traceContextDataCollected); // Now walk the stack: CONTEXT context; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp index ddeed8ac0705..f1dba7222772 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.cpp @@ -59,6 +59,7 @@ Configuration::Configuration() _cpuWallTimeSamplingRate = ExtractCpuWallTimeSamplingRate(); _walltimeThreadsThreshold = ExtractWallTimeThreadsThreshold(); _cpuThreadsThreshold = ExtractCpuThreadsThreshold(); + _codeHotspotsThreadsThreshold = ExtractCodeHotspotsThreadsThreshold(); _minimumCores = GetEnvironmentValue(EnvironmentVariables::CoreMinimumOverride, 1.0); _namedPipeName = GetEnvironmentValue(EnvironmentVariables::NamedPipeName, DefaultEmptyString); _isTimestampsAsLabelEnabled = GetEnvironmentValue(EnvironmentVariables::TimestampsAsLabelEnabled, false); @@ -167,6 +168,11 @@ int32_t Configuration::CpuThreadsThreshold() const return _cpuThreadsThreshold; } +int32_t Configuration::CodeHotspotsThreadsThreshold() const +{ + return _codeHotspotsThreadsThreshold; +} + double Configuration::MinimumCores() const { return _minimumCores; @@ -366,6 +372,13 @@ int32_t Configuration::ExtractWallTimeThreadsThreshold() return threshold; } +int32_t Configuration::ExtractCodeHotspotsThreadsThreshold() +{ + // default threads to sample for codehotspots is 10; could be changed via env vars but down to 1ms + int32_t threshold = std::max(GetEnvironmentValue(EnvironmentVariables::CodeHotspotsThreadsThreshold, 10), 1); + return threshold; +} + int32_t Configuration::ExtractCpuThreadsThreshold() { // default threads to sample for CPU profiling is 64; could be changed via env vars from 5 to 128 diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h index b44719ddb4f8..a05808809deb 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/Configuration.h @@ -52,6 +52,7 @@ class Configuration final : public IConfiguration bool IsTimestampsAsLabelEnabled() const override; int32_t WalltimeThreadsThreshold() const override; int32_t CpuThreadsThreshold() const override; + int32_t CodeHotspotsThreadsThreshold() const override; bool IsGarbageCollectionProfilingEnabled() const override; private: @@ -72,6 +73,7 @@ class Configuration final : public IConfiguration static std::chrono::nanoseconds ExtractCpuWallTimeSamplingRate(); static int32_t ExtractWallTimeThreadsThreshold(); static int32_t ExtractCpuThreadsThreshold(); + static int32_t ExtractCodeHotspotsThreadsThreshold(); static bool GetContention(); private: @@ -116,6 +118,7 @@ class Configuration final : public IConfiguration std::chrono::nanoseconds _cpuWallTimeSamplingRate; int32_t _walltimeThreadsThreshold; int32_t _cpuThreadsThreshold; + int32_t _codeHotspotsThreadsThreshold; double _minimumCores; std::string _namedPipeName; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp index 633371a7d94c..2fd66804ac5d 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.cpp @@ -114,6 +114,8 @@ bool CorProfilerCallback::InitializeServices() _pManagedThreadList = RegisterService(_pCorProfilerInfo); + _pCodeHotspotsThreadList = RegisterService(_pCorProfilerInfo); + auto* pRuntimeIdStore = RegisterService(); // Each sample contains a vector of values. @@ -245,6 +247,7 @@ bool CorProfilerCallback::InitializeServices() _pClrLifetime.get(), _pThreadsCpuManager, _pManagedThreadList, + _pCodeHotspotsThreadList, _pWallTimeProvider, _pCpuTimeProvider); @@ -342,6 +345,7 @@ bool CorProfilerCallback::DisposeServices() _pThreadsCpuManager = nullptr; _pStackSamplerLoopManager = nullptr; _pManagedThreadList = nullptr; + _pCodeHotspotsThreadList = nullptr; _pApplicationStore = nullptr; return result; @@ -1104,13 +1108,18 @@ HRESULT STDMETHODCALLTYPE CorProfilerCallback::ThreadDestroyed(ThreadID threadId } std::shared_ptr pThreadInfo; - if (_pManagedThreadList->UnregisterThread(threadId, pThreadInfo)) + Log::Debug("Removing thread ", std::hex, threadId, " from the trace context threads list."); + if (_pCodeHotspotsThreadList->UnregisterThread(threadId, pThreadInfo)) { // The docs require that we do not allow to destroy a thread while it is being stack-walked. // TO ensure this, SetThreadDestroyed(..) acquires the StackWalkLock associated with this ThreadInfo. pThreadInfo->SetThreadDestroyed(); + pThreadInfo.reset(); } + Log::Debug("Removing thread ", std::hex, threadId, " from the main managed thread list."); + _pManagedThreadList->UnregisterThread(threadId, pThreadInfo); + return S_OK; } diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h index 6be4723b36b2..67e69b387ab2 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CorProfilerCallback.h @@ -182,6 +182,7 @@ class CorProfilerCallback : public ICorProfilerCallback10 public: IThreadsCpuManager* GetThreadsCpuManager() { return _pThreadsCpuManager; } IManagedThreadList* GetManagedThreadList() { return _pManagedThreadList; } + IManagedThreadList* GetCodeHotspotThreadList() { return _pCodeHotspotsThreadList; } IStackSamplerLoopManager* GetStackSamplerLoopManager() { return _pStackSamplerLoopManager; } IApplicationStore* GetApplicationStore() { return _pApplicationStore; } IExporter* GetExporter() { return _pExporter.get(); } @@ -204,6 +205,7 @@ private : IThreadsCpuManager* _pThreadsCpuManager = nullptr; IStackSamplerLoopManager* _pStackSamplerLoopManager = nullptr; IManagedThreadList* _pManagedThreadList = nullptr; + IManagedThreadList* _pCodeHotspotsThreadList = nullptr; IApplicationStore* _pApplicationStore = nullptr; ExceptionsProvider* _pExceptionsProvider = nullptr; WallTimeProvider* _pWallTimeProvider = nullptr; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h index d0dac31883d0..6c2f7d5193de 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/EnvironmentVariables.h @@ -43,6 +43,7 @@ class EnvironmentVariables final inline static const shared::WSTRING CpuWallTimeSamplingRate = WStr("DD_INTERNAL_PROFILING_SAMPLING_RATE"); inline static const shared::WSTRING WalltimeThreadsThreshold = WStr("DD_INTERNAL_PROFILING_WALLTIME_THREADS_THRESHOLD"); inline static const shared::WSTRING CpuTimeThreadsThreshold = WStr("DD_INTERNAL_PROFILING_CPUTIME_THREADS_THRESHOLD"); + inline static const shared::WSTRING CodeHotspotsThreadsThreshold = WStr("DD_INTERNAL_PROFILING_CODEHOTSPOTS_THREADS_THRESHOLD"); inline static const shared::WSTRING TimestampsAsLabelEnabled = WStr("DD_INTERNAL_PROFILING_TIMESTAMPS_AS_LABEL_ENABLED"); inline static const shared::WSTRING ProfilesOutputDir = WStr("DD_INTERNAL_PROFILING_OUTPUT_DIR"); inline static const shared::WSTRING DevelopmentConfiguration = WStr("DD_INTERNAL_USE_DEVELOPMENT_CONFIGURATION"); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h index 225ab0a3e938..9441d0899c81 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IConfiguration.h @@ -49,5 +49,6 @@ class IConfiguration virtual bool IsTimestampsAsLabelEnabled() const = 0; virtual int32_t WalltimeThreadsThreshold() const = 0; virtual int32_t CpuThreadsThreshold() const = 0; + virtual int32_t CodeHotspotsThreadsThreshold() const = 0; virtual bool IsGarbageCollectionProfilingEnabled() const = 0; }; \ No newline at end of file diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IManagedThreadList.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IManagedThreadList.h index 9baee94f5fc9..4429a43b18ec 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IManagedThreadList.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/IManagedThreadList.h @@ -14,6 +14,7 @@ class IManagedThreadList : public IService { public: virtual bool GetOrCreateThread(ThreadID clrThreadId) = 0; + virtual bool RegisterThread(std::shared_ptr& pThreadInfo) = 0; virtual bool UnregisterThread(ThreadID clrThreadId, std::shared_ptr& ppThreadInfo) = 0; virtual bool SetThreadOsInfo(ThreadID clrThreadId, DWORD osThreadId, HANDLE osThreadHandle) = 0; virtual bool SetThreadName(ThreadID clrThreadId, const shared::WSTRING& threadName) = 0; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadInfo.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadInfo.h index eb165561684d..2502a0a1b92e 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadInfo.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadInfo.h @@ -77,6 +77,7 @@ struct ManagedThreadInfo inline std::uint64_t GetLocalRootSpanId() const; inline std::uint64_t GetSpanId() const; inline bool CanReadTraceContext() const; + inline bool HasTraceContext() const; inline std::string GetProfileThreadId(); inline std::string GetProfileThreadName(); @@ -358,3 +359,15 @@ inline bool ManagedThreadInfo::CanReadTraceContext() const std::atomic_thread_fence(std::memory_order_acquire); return canReadTraceContext == 0; } + +inline bool ManagedThreadInfo::HasTraceContext() const +{ + if (CanReadTraceContext()) + { + std::uint64_t localRootSpanId = GetLocalRootSpanId(); + std::uint64_t spanId = GetSpanId(); + + return localRootSpanId != 0 && spanId != 0; + } + return false; +} diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.cpp index 1771f2f6e7e2..34be765588b9 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.cpp @@ -131,7 +131,7 @@ bool ManagedThreadList::UnregisterThread(ThreadID clrThreadId, std::shared_ptr pInfo = *i; // make a copy so it can be moved later if (pInfo->GetClrThreadId() == clrThreadId) { @@ -151,7 +151,7 @@ bool ManagedThreadList::UnregisterThread(ThreadID clrThreadId, std::shared_ptr ManagedThreadList::FindByClrId(ThreadID clrTh { return elem->second; } +} + +bool ManagedThreadList::RegisterThread(std::shared_ptr& pThreadInfo) +{ + std::lock_guard lock(_mutex); + + auto it = _lookupByClrThreadId.find(pThreadInfo->GetClrThreadId()); + if (it == _lookupByClrThreadId.cend()) + { + // not registered yet + + _threads.push_back(pThreadInfo); + _lookupByClrThreadId[pThreadInfo->GetClrThreadId()] = pThreadInfo; + _lookupByProfilerThreadInfoId[pThreadInfo->GetProfilerThreadInfoId()] = pThreadInfo; + + return true; + } + + return false; } \ No newline at end of file diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.h index 15b9280ecd59..06d05cdd5da7 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/ManagedThreadList.h @@ -31,6 +31,7 @@ class ManagedThreadList : public IManagedThreadList bool Start() override; bool Stop() override; bool GetOrCreateThread(ThreadID clrThreadId) override; + bool RegisterThread(std::shared_ptr& pThreadInfo) override; bool UnregisterThread(ThreadID clrThreadId, std::shared_ptr& ppThreadInfo) override; bool SetThreadOsInfo(ThreadID clrThreadId, DWORD osThreadId, HANDLE osThreadHandle) override; bool SetThreadName(ThreadID clrThreadId, const shared::WSTRING& threadName) override; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.cpp index 2f7e9452f1cf..c73925cfc9dc 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.cpp @@ -383,4 +383,13 @@ bool OpSysTools::IsSafeToStartProfiler(double coresThreshold) return true; #endif -} \ No newline at end of file +} + +void OpSysTools::Sleep(std::chrono::nanoseconds duration) +{ +#ifdef _WINDOWS + ::Sleep(static_cast(duration.count() / 1000000)); +#else + usleep(duration.count() / 1000); +#endif +} diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.h index c6d8c9dfd270..e6da2c035ead 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/OpSysTools.h @@ -72,6 +72,8 @@ class OpSysTools final static bool IsSafeToStartProfiler(double coresThreshold); static std::int64_t GetHighPrecisionTimestamp(); + static void Sleep(std::chrono::nanoseconds duration); + private: static constexpr std::int64_t NanosecondsPerSecond = 1000000000; diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/PInvoke.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/PInvoke.cpp index 76311a6838fb..4072fcace021 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/PInvoke.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/PInvoke.cpp @@ -71,6 +71,8 @@ extern "C" void* __stdcall GetPointerToNativeTraceContext() return nullptr; } + profiler->GetCodeHotspotThreadList()->RegisterThread(pCurrentThreadInfo); + // Get pointers to the relevant fields within the thread info data structure. return pCurrentThreadInfo->GetTraceContextPointer(); } diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.cpp index 5cd8377d1a48..9e14aa1f9d44 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.cpp @@ -48,6 +48,7 @@ StackSamplerLoop::StackSamplerLoop( StackSamplerLoopManager* pManager, IThreadsCpuManager* pThreadsCpuManager, IManagedThreadList* pManagedThreadList, + IManagedThreadList* pCodeHotspotThreadList, ICollector* pWallTimeCollector, ICollector* pCpuTimeCollector) : @@ -57,6 +58,7 @@ StackSamplerLoop::StackSamplerLoop( _pManager{pManager}, _pThreadsCpuManager{pThreadsCpuManager}, _pManagedThreadList{pManagedThreadList}, + _pCodeHotspotsThreadList{pCodeHotspotThreadList}, _pWallTimeCollector{pWallTimeCollector}, _pCpuTimeCollector{pCpuTimeCollector}, _pLoopThread{nullptr}, @@ -65,17 +67,20 @@ StackSamplerLoop::StackSamplerLoop( _iteratorWallTime{0}, _iteratorCpuTime{0}, _walltimeThreadsThreshold{pConfiguration->WalltimeThreadsThreshold()}, - _cpuThreadsThreshold{pConfiguration->CpuThreadsThreshold()} + _cpuThreadsThreshold{pConfiguration->CpuThreadsThreshold()}, + _codeHotspotsThreadsThreshold{pConfiguration->CodeHotspotsThreadsThreshold()} { _samplingPeriod = _pConfiguration->CpuWallTimeSamplingRate(); Log::Info("CPU and wall time sampling period = ", _samplingPeriod.count() / 1000000, " ms"); Log::Info("Wall time sampled threads = ", _walltimeThreadsThreshold); + Log::Info("Max CodeHotspots sampled threads = ", _codeHotspotsThreadsThreshold); Log::Info("Max CPU sampled threads = ", _cpuThreadsThreshold); _pCorProfilerInfo->AddRef(); _iteratorWallTime = _pManagedThreadList->CreateIterator(); _iteratorCpuTime = _pManagedThreadList->CreateIterator(); + _iteratorCodeHotspot = _pCodeHotspotsThreadList->CreateIterator(); _pLoopThread = new std::thread(&StackSamplerLoop::MainLoop, this); OpSysTools::SetNativeThreadName(_pLoopThread, ThreadName); @@ -135,7 +140,7 @@ void StackSamplerLoop::MainLoop() { try { - WaitOnePeriod(); + OpSysTools::Sleep(_samplingPeriod); MainLoopIteration(); } catch (const std::runtime_error& re) @@ -155,16 +160,14 @@ void StackSamplerLoop::MainLoop() Log::Debug("StackSamplerLoop::MainLoop has ended."); } -void StackSamplerLoop::WaitOnePeriod(void) -{ - std::this_thread::sleep_for(_samplingPeriod); -} - -void StackSamplerLoop::MainLoopIteration(void) +void StackSamplerLoop::MainLoopIteration() { // In each iteration, a few threads are sampled to compute wall time. if (_pConfiguration->IsWallTimeProfilingEnabled()) { + // First we collect threads that have trace context to increase the chance to get + CodeHotspotIteration(); + // Then we collect threads that do not have trace context WalltimeProfilingIteration(); } @@ -176,30 +179,55 @@ void StackSamplerLoop::MainLoopIteration(void) } } -void StackSamplerLoop::WalltimeProfilingIteration(void) +void StackSamplerLoop::WalltimeProfilingIteration() { int32_t managedThreadsCount = _pManagedThreadList->Count(); int32_t sampledThreadsCount = (std::min)(managedThreadsCount, _walltimeThreadsThreshold); - for (int32_t i = 0; i < sampledThreadsCount && !_shutdownRequested; i++) + int32_t i = 0; + + ManagedThreadInfo* firstThread = nullptr; + + do { _targetThread = _pManagedThreadList->LoopNext(_iteratorWallTime); - if (_targetThread != nullptr) - { - int64_t thisSampleTimestampNanosecs = OpSysTools::GetHighPrecisionTimestamp(); - int64_t prevSampleTimestampNanosecs = _targetThread->SetLastSampleHighPrecisionTimestampNanoseconds(thisSampleTimestampNanosecs); - int64_t duration = ComputeWallTime(thisSampleTimestampNanosecs, prevSampleTimestampNanosecs); - CollectOneThreadStackSample(_targetThread, thisSampleTimestampNanosecs, duration, PROFILING_TYPE::WallTime); + // either the list is empty or iterator is not in the array range + // so prefer bailing out + if (_targetThread == nullptr) + { + break; + } + if (firstThread == _targetThread.get()) + { _targetThread.reset(); + break; + } - // @ToDo: Investigate whether the OpSysTools::StartPreciseTimerServices(..) invocation made by - // the StackSamplerLoopManager ctor really ensures that this yield for 1ms or less. - // If not, we should not be yielding here. - std::this_thread::yield(); + if (firstThread == nullptr) + { + firstThread = _targetThread.get(); } - } + + // skip thread if it has a trace context + if (_targetThread->HasTraceContext()) + { + _targetThread.reset(); + continue; + } + + int64_t thisSampleTimestampNanosecs = OpSysTools::GetHighPrecisionTimestamp(); + int64_t prevSampleTimestampNanosecs = _targetThread->SetLastSampleHighPrecisionTimestampNanoseconds(thisSampleTimestampNanosecs); + int64_t duration = ComputeWallTime(thisSampleTimestampNanosecs, prevSampleTimestampNanosecs); + + CollectOneThreadStackSample(_targetThread, thisSampleTimestampNanosecs, duration, PROFILING_TYPE::WallTime); + + _targetThread.reset(); + i++; + + } while (i < sampledThreadsCount && !_shutdownRequested); + } void StackSamplerLoop::CpuProfilingIteration(void) @@ -240,12 +268,60 @@ void StackSamplerLoop::CpuProfilingIteration(void) CollectOneThreadStackSample(_targetThread, thisSampleTimestampNanosecs, cpuForSample, PROFILING_TYPE::CpuTime); } } - // don't yield until a thread to sample is found _targetThread.reset(); } } } +void StackSamplerLoop::CodeHotspotIteration() +{ + int32_t managedThreadsCount = _pManagedThreadList->Count(); + int32_t sampledThreadsCount = (std::min)(managedThreadsCount, _codeHotspotsThreadsThreshold); + + int32_t i = 0; + ManagedThreadInfo* firstThread = nullptr; + + do + { + _targetThread = _pCodeHotspotsThreadList->LoopNext(_iteratorCodeHotspot); + + // either the list is empty or iterator is not in the array range, so there is a bug + // so prefer bailing out + if (_targetThread == nullptr) + { + break; + } + + // keep track of the first seen thread, to avoid infinite loop + if (firstThread == _targetThread.get()) + { + _targetThread.reset(); + break; + } + + if (firstThread == nullptr) + { + firstThread = _targetThread.get(); + } + + // skip if it has no trace context + if (!_targetThread->HasTraceContext()) + { + _targetThread.reset(); + continue; + } + + int64_t thisSampleTimestampNanosecs = OpSysTools::GetHighPrecisionTimestamp(); + int64_t prevSampleTimestampNanosecs = _targetThread->SetLastSampleHighPrecisionTimestampNanoseconds(thisSampleTimestampNanosecs); + int64_t duration = ComputeWallTime(thisSampleTimestampNanosecs, prevSampleTimestampNanosecs); + + CollectOneThreadStackSample(_targetThread, thisSampleTimestampNanosecs, duration, PROFILING_TYPE::WallTime); + + _targetThread.reset(); + i++; + } while (i < sampledThreadsCount && !_shutdownRequested); +} + void StackSamplerLoop::CollectOneThreadStackSample( std::shared_ptr& pThreadInfo, int64_t thisSampleTimestampNanosecs, diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.h index 77194390eeb0..d3b466ed9706 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoop.h @@ -50,6 +50,7 @@ class StackSamplerLoop StackSamplerLoopManager* pManager, IThreadsCpuManager* pThreadsCpuManager, IManagedThreadList* pManagedThreadList, + IManagedThreadList* pCodeHotspotThreadList, ICollector* pWallTimeCollector, ICollector* pCpuTimeCollector ); @@ -67,6 +68,7 @@ class StackSamplerLoop IConfiguration* _pConfiguration; IThreadsCpuManager* _pThreadsCpuManager; IManagedThreadList* _pManagedThreadList; + IManagedThreadList* _pCodeHotspotsThreadList; ICollector* _pWallTimeCollector; ICollector* _pCpuTimeCollector; @@ -76,8 +78,10 @@ class StackSamplerLoop std::shared_ptr _targetThread; uint32_t _iteratorWallTime; uint32_t _iteratorCpuTime; + uint32_t _iteratorCodeHotspot; int32_t _walltimeThreadsThreshold; int32_t _cpuThreadsThreshold; + int32_t _codeHotspotsThreadsThreshold; private: std::unordered_map _encounteredStackSnapshotHRs; @@ -88,11 +92,11 @@ class StackSamplerLoop std::chrono::nanoseconds _samplingPeriod; private: - void MainLoop(void); - void WaitOnePeriod(void); - void MainLoopIteration(void); - void CpuProfilingIteration(void); - void WalltimeProfilingIteration(void); + void MainLoop(); + void MainLoopIteration(); + void CpuProfilingIteration(); + void WalltimeProfilingIteration(); + void CodeHotspotIteration(); void CollectOneThreadStackSample(std::shared_ptr& pThreadInfo, int64_t thisSampleTimestampNanosecs, int64_t duration, diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.cpp index ef955413aade..b07518a49a23 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.cpp @@ -30,6 +30,7 @@ StackSamplerLoopManager::StackSamplerLoopManager( IClrLifetime const* clrLifetime, IThreadsCpuManager* pThreadsCpuManager, IManagedThreadList* pManagedThreadList, + IManagedThreadList* pCodeHotspotThreadList, ICollector* pWallTimeCollector, ICollector* pCpuTimeCollector ) : @@ -52,6 +53,7 @@ StackSamplerLoopManager::StackSamplerLoopManager( _pClrLifetime{clrLifetime}, _pThreadsCpuManager{pThreadsCpuManager}, _pManagedThreadList{pManagedThreadList}, + _pCodeHotspotsThreadList{pCodeHotspotThreadList}, _pWallTimeCollector{pWallTimeCollector}, _pCpuTimeCollector{pCpuTimeCollector}, _deadlockInterventionInProgress{0} @@ -118,6 +120,7 @@ void StackSamplerLoopManager::RunStackSampling() this, _pThreadsCpuManager, _pManagedThreadList, + _pCodeHotspotsThreadList, _pWallTimeCollector, _pCpuTimeCollector ); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.h index abb0084c0514..14663fdae48a 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/StackSamplerLoopManager.h @@ -74,6 +74,7 @@ class StackSamplerLoopManager : public IStackSamplerLoopManager IClrLifetime const* clrLifetime, IThreadsCpuManager* pThreadsCpuManager, IManagedThreadList* pManagedThreadList, + IManagedThreadList* pCodeHotspotThreadList, ICollector* pWallTimeCollector, ICollector* pCpuTimeCollector ); @@ -194,6 +195,7 @@ class StackSamplerLoopManager : public IStackSamplerLoopManager IConfiguration* _pConfiguration = nullptr; IThreadsCpuManager* _pThreadsCpuManager = nullptr; IManagedThreadList* _pManagedThreadList = nullptr; + IManagedThreadList* _pCodeHotspotsThreadList = nullptr; ICollector* _pWallTimeCollector = nullptr; ICollector* _pCpuTimeCollector = nullptr; diff --git a/profiler/test/Datadog.Profiler.IntegrationTests/CodeHotspot/CodeHotspotTest.cs b/profiler/test/Datadog.Profiler.IntegrationTests/CodeHotspot/CodeHotspotTest.cs index 6a068422cfda..4ea23bd2949e 100644 --- a/profiler/test/Datadog.Profiler.IntegrationTests/CodeHotspot/CodeHotspotTest.cs +++ b/profiler/test/Datadog.Profiler.IntegrationTests/CodeHotspot/CodeHotspotTest.cs @@ -32,11 +32,110 @@ public CodeHotspotTest(ITestOutputHelper output) } [TestAppFact("Samples.BuggyBits")] - public void CheckSpanContextAreAttached(string appName, string framework, string appAssembly) + public void CheckTraceContextAreAttachedForWalltimeProfilerHumberOfThreads(string appName, string framework, string appAssembly) + { + var runner = new TestApplicationRunner(appName, framework, appAssembly, _output, enableTracer: true, commandLine: ScenarioCodeHotspot + " --with-idle-threads 500"); + // By default, the codehotspot feature is activated + + runner.Environment.SetVariable(EnvironmentVariables.WallTimeProfilerEnabled, "1"); + runner.Environment.SetVariable(EnvironmentVariables.CpuProfilerEnabled, "0"); + + using var agent = MockDatadogAgent.CreateHttpAgent(_output); + + var profilerRuntimeIds = new List(); + agent.ProfilerRequestReceived += (object sender, EventArgs ctx) => + { + profilerRuntimeIds.Add(ExtractRuntimeIdFromProfilerRequest(ctx.Value.Request)); + }; + + var tracerRuntimeIds = new List(); + agent.TracerRequestReceived += (object sender, EventArgs ctx) => + { + tracerRuntimeIds.AddRange(ExtractRuntimeIdsFromTracerRequest(ctx.Value.Request)); + }; + + runner.Run(agent); + + Assert.True(agent.NbCallsOnProfilingEndpoint > 0); + + Assert.Single(profilerRuntimeIds.Distinct()); + Assert.Single(tracerRuntimeIds.Distinct()); + + var profilerRuntimeId = profilerRuntimeIds.First(); + Assert.NotNull(profilerRuntimeId); + Assert.NotEmpty(profilerRuntimeId); + + var tracerRuntimeId = tracerRuntimeIds.First(); + Assert.NotNull(tracerRuntimeId); + Assert.NotEmpty(tracerRuntimeId); + + Assert.Equal(profilerRuntimeId, tracerRuntimeId); + + // We cannot enumerate and check for each pprof files if it contains trace context. + // The profiler is configured to export/write pprof file every 3s, but + // depending on the machine or if it's release or debug, the first pprof file + // may not contains any trace context. + var tracingContexts = GetTracingContextsFromPprofFiles(runner.Environment.PprofDir); + Assert.NotEmpty(tracingContexts); + } + + [TestAppFact("Samples.BuggyBits")] + public void CheckSpanContextAreAttachedForWalltimeProfiler(string appName, string framework, string appAssembly) { var runner = new TestApplicationRunner(appName, framework, appAssembly, _output, enableTracer: true, commandLine: ScenarioCodeHotspot); // By default, the codehotspot feature is activated + runner.Environment.SetVariable(EnvironmentVariables.WallTimeProfilerEnabled, "1"); + runner.Environment.SetVariable(EnvironmentVariables.CpuProfilerEnabled, "0"); + + using var agent = MockDatadogAgent.CreateHttpAgent(_output); + + var profilerRuntimeIds = new List(); + agent.ProfilerRequestReceived += (object sender, EventArgs ctx) => + { + profilerRuntimeIds.Add(ExtractRuntimeIdFromProfilerRequest(ctx.Value.Request)); + }; + + var tracerRuntimeIds = new List(); + agent.TracerRequestReceived += (object sender, EventArgs ctx) => + { + tracerRuntimeIds.AddRange(ExtractRuntimeIdsFromTracerRequest(ctx.Value.Request)); + }; + + runner.Run(agent); + + Assert.True(agent.NbCallsOnProfilingEndpoint > 0); + + Assert.Single(profilerRuntimeIds.Distinct()); + Assert.Single(tracerRuntimeIds.Distinct()); + + var profilerRuntimeId = profilerRuntimeIds.First(); + Assert.NotNull(profilerRuntimeId); + Assert.NotEmpty(profilerRuntimeId); + + var tracerRuntimeId = tracerRuntimeIds.First(); + Assert.NotNull(tracerRuntimeId); + Assert.NotEmpty(tracerRuntimeId); + + Assert.Equal(profilerRuntimeId, tracerRuntimeId); + + // We cannot enumerate and check for each pprof files if it contains trace context. + // The profiler is configured to export/write pprof file every 3s, but + // depending on the machine or if it's release or debug, the first pprof file + // may not contains any trace context. + var tracingContexts = GetTracingContextsFromPprofFiles(runner.Environment.PprofDir); + Assert.NotEmpty(tracingContexts); + } + + [TestAppFact("Samples.BuggyBits")] + public void CheckSpanContextAreAttachedForCpuProfiler(string appName, string framework, string appAssembly) + { + var runner = new TestApplicationRunner(appName, framework, appAssembly, _output, enableTracer: true, commandLine: "--scenario 6"); + // By default, the codehotspot feature is activated + + runner.Environment.SetVariable(EnvironmentVariables.WallTimeProfilerEnabled, "0"); + runner.Environment.SetVariable(EnvironmentVariables.CpuProfilerEnabled, "1"); + using var agent = MockDatadogAgent.CreateHttpAgent(_output); var profilerRuntimeIds = new List(); @@ -68,8 +167,8 @@ public void CheckSpanContextAreAttached(string appName, string framework, string Assert.Equal(profilerRuntimeId, tracerRuntimeId); - var profileTracingContexts = GetTracingContextsFromPprofFiles(runner.Environment.PprofDir); - Assert.NotEmpty(profileTracingContexts); + var tracingContexts = GetTracingContextsFromPprofFiles(runner.Environment.PprofDir); + Assert.NotEmpty(tracingContexts); // In the first versions of this test, we extracted span ids from Tracer requests and ensured that the ones collected by the // profiler was a subset. But this makes the test flacky: not flushed when the application is closing. diff --git a/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h b/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h index f13aed668969..dd35e559753b 100644 --- a/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h +++ b/profiler/test/Datadog.Profiler.Native.Tests/ProfilerMockedInterface.h @@ -54,6 +54,7 @@ class MockConfiguration : public IConfiguration MOCK_METHOD(bool, IsTimestampsAsLabelEnabled, (), (const override)); MOCK_METHOD(int32_t, WalltimeThreadsThreshold, (), (const override)); MOCK_METHOD(int32_t, CpuThreadsThreshold, (), (const override)); + MOCK_METHOD(int32_t, CodeHotspotsThreadsThreshold, (), (const override)); MOCK_METHOD(bool, IsGarbageCollectionProfilingEnabled, (), (const override)); };