forked from tgstation/tgstation-server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSystemDManager.cs
207 lines (170 loc) · 6.67 KB
/
SystemDManager.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
using System;
using System.Globalization;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Mono.Unix;
using Tgstation.Server.Host.Components;
using Tgstation.Server.Host.Core;
namespace Tgstation.Server.Host.System
{
/// <summary>
/// Implements the SystemD notify service protocol.
/// </summary>
sealed class SystemDManager : BackgroundService, IRestartHandler, IDisposable
{
/// <summary>
/// The sd_notify command for notifying the watchdog we are alive.
/// </summary>
const string SDNotifyWatchdog = "WATCHDOG=1";
/// <summary>
/// The <see cref="IHostApplicationLifetime"/> for the <see cref="SystemDManager"/>.
/// </summary>
readonly IHostApplicationLifetime applicationLifetime;
/// <summary>
/// The <see cref="IInstanceManager"/> for the <see cref="SystemDManager"/>.
/// </summary>
readonly IInstanceManager instanceManager;
/// <summary>
/// The <see cref="IRestartRegistration"/> for the <see cref="SystemDManager"/>.
/// </summary>
readonly IRestartRegistration restartRegistration;
/// <summary>
/// The <see cref="ILogger"/> for the <see cref="SystemDManager"/>.
/// </summary>
readonly ILogger<SystemDManager> logger;
/// <summary>
/// If TGS is going to restart.
/// </summary>
bool restartInProgress;
/// <summary>
/// Get the current total nanoseconds value of the CLOCK_MONOTONIC clock.
/// </summary>
/// <returns>A <see cref="long"/> representing the clock time in nanoseconds.</returns>
/// <remarks>See https://linux.die.net/man/3/clock_gettime.</remarks>
static long GetMonotonicUsec() => global::System.Diagnostics.Stopwatch.GetTimestamp(); // HACK: https://github.com/dotnet/runtime/blob/v8.0.0-preview.6.23329.7/src/native/libs/System.Native/pal_time.c#L84 clock_gettime_nsec_np is an OSX only thing apparently...
/// <summary>
/// Initializes a new instance of the <see cref="SystemDManager"/> class.
/// </summary>
/// <param name="applicationLifetime">The value of <see cref="applicationLifetime"/>.</param>
/// <param name="instanceManager">The value of <see cref="instanceManager"/>.</param>
/// <param name="serverControl">The <see cref="IServerControl"/> used to create the <see cref="restartRegistration"/>.</param>
/// <param name="logger">The value of <see cref="ILogger"/>.</param>
public SystemDManager(
IHostApplicationLifetime applicationLifetime,
IInstanceManager instanceManager,
IServerControl serverControl,
ILogger<SystemDManager> logger)
{
this.applicationLifetime = applicationLifetime ?? throw new ArgumentNullException(nameof(applicationLifetime));
this.instanceManager = instanceManager ?? throw new ArgumentNullException(nameof(instanceManager));
ArgumentNullException.ThrowIfNull(serverControl);
this.logger = logger ?? throw new ArgumentNullException(nameof(logger));
restartRegistration = serverControl.RegisterForRestart(this);
}
/// <inheritdoc />
public override void Dispose()
{
base.Dispose();
restartRegistration.Dispose();
}
/// <inheritdoc />
public ValueTask HandleRestart(Version? updateVersion, bool handlerMayDelayShutdownWithExtremelyLongRunningTasks, CancellationToken cancellationToken)
{
// If this is set, we know a gracefule SHUTDOWN was requested
restartInProgress = !handlerMayDelayShutdownWithExtremelyLongRunningTasks;
return ValueTask.CompletedTask;
}
/// <inheritdoc />
protected override async Task ExecuteAsync(CancellationToken cancellationToken)
{
if (!SendSDNotify(SDNotifyWatchdog))
{
logger.LogDebug("SystemD not detected");
return;
}
logger.LogDebug("SystemD detected");
if (applicationLifetime.ApplicationStarted.IsCancellationRequested)
throw new InvalidOperationException("RunAsync called after application started!");
logger.LogTrace("Installing lifetime handlers...");
var readyCounts = 0;
void CheckReady()
{
if (Interlocked.Increment(ref readyCounts) < 2)
return;
SendSDNotify("READY=1");
}
applicationLifetime.ApplicationStarted.Register(() => CheckReady());
applicationLifetime.ApplicationStopping.Register(
() => SendSDNotify(
restartInProgress
? $"RELOADING=1\nMONOTONIC_USEC={GetMonotonicUsec()}"
: "STOPPING=1"));
try
{
await instanceManager.Ready.WaitAsync(cancellationToken);
CheckReady();
var watchdogUsec = Environment.GetEnvironmentVariable("WATCHDOG_USEC");
if (String.IsNullOrWhiteSpace(watchdogUsec))
{
logger.LogDebug("WATCHDOG_USEC not present, not starting watchdog loop");
return;
}
var microseconds = UInt64.Parse(watchdogUsec, CultureInfo.InvariantCulture);
var timeoutIntervalMillis = (int)(microseconds / 1000);
logger.LogDebug("Starting watchdog loop with interval of {timeoutInterval}ms", timeoutIntervalMillis);
var timeoutInterval = TimeSpan.FromMilliseconds(timeoutIntervalMillis);
var nextExpectedTimeout = DateTimeOffset.UtcNow + timeoutInterval;
var timeToNextExpectedTimeout = nextExpectedTimeout - DateTimeOffset.UtcNow;
while (!cancellationToken.IsCancellationRequested)
{
var delayInterval = timeToNextExpectedTimeout / 2;
await Task.Delay(delayInterval, cancellationToken);
var notifySuccess = SendSDNotify(SDNotifyWatchdog);
var now = DateTimeOffset.UtcNow;
if (notifySuccess)
nextExpectedTimeout = now + timeoutInterval;
timeToNextExpectedTimeout = nextExpectedTimeout - now;
if (!notifySuccess)
logger.LogWarning("Missed systemd heartbeat! Expected timeout in {timeoutMs}ms...", timeToNextExpectedTimeout.TotalMilliseconds);
}
}
catch (OperationCanceledException ex)
{
logger.LogTrace(ex, "Watchdog loop cancelled!");
}
catch (Exception ex)
{
logger.LogError(ex, "Watchdog loop crashed!");
}
logger.LogDebug("Exited watchdog loop");
}
/// <summary>
/// Send a sd_notify <paramref name="command"/>.
/// </summary>
/// <param name="command">The <see cref="string"/> to send via sd_notify.</param>
/// <returns><see langword="true"/> if the command succeeded, <see langword="false"/> otherwise.</returns>
bool SendSDNotify(string command)
{
logger.LogTrace("Sending sd_notify {message}...", command);
int result;
try
{
result = NativeMethods.sd_notify(0, command);
}
catch (Exception ex)
{
logger.LogInformation(ex, "Exception attempting to invoke sd_notify!");
return false;
}
if (result > 0)
return true;
if (result < 0)
logger.LogError(new UnixIOException(result), "sd_notify {message} failed!", command);
else
logger.LogTrace("Could not send sd_notify {message}. Socket closed!", command);
return false;
}
}
}