From fddf8f57004e151648f6ae5f47c9dfd540a453e6 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Wed, 10 Jan 2024 16:36:32 -0500 Subject: [PATCH 1/8] Initial stgab at improved health checking * upgrade to net8.0 * Add health check endpoint to GRPC engine proto (from serval) * Combine health reports into rich data --- dockerfile | 4 +- dockerfile.development | 2 +- .../IMachineBuilderExtensions.cs | 39 +++++++++++++------ .../SIL.Machine.AspNetCore.csproj | 3 +- .../ServalTranslationEngineServiceV1.cs | 15 ++++++- .../Utils/WriteHealthCheckResponse.cs | 34 ++++++++++++++++ ....Machine.Morphology.HermitCrab.Tool.csproj | 2 +- .../SIL.Machine.Plugin.csproj | 2 +- .../Program.cs | 1 - .../SIL.Machine.Serval.EngineServer.csproj | 4 +- src/SIL.Machine.Serval.JobServer/Program.cs | 2 - .../SIL.Machine.Serval.JobServer.csproj | 4 +- src/SIL.Machine.Tool/SIL.Machine.Tool.csproj | 2 +- .../SIL.Machine.AspNetCore.Tests.csproj | 2 +- ...Machine.Morphology.HermitCrab.Tests.csproj | 2 +- .../SIL.Machine.Tests.csproj | 2 +- ...ne.Tokenization.SentencePiece.Tests.csproj | 2 +- .../SIL.Machine.Translation.Thot.Tests.csproj | 2 +- 18 files changed, 93 insertions(+), 31 deletions(-) create mode 100644 src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs diff --git a/dockerfile b/dockerfile index d04eb9ef1..da052071b 100644 --- a/dockerfile +++ b/dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/dotnet/sdk:6.0-jammy AS build-env +FROM mcr.microsoft.com/dotnet/sdk:8.0-jammy AS build-env WORKDIR /app RUN apt-get update && apt-get install -y g++ curl cmake @@ -12,7 +12,7 @@ RUN dotnet publish ./src/SIL.Machine.Serval.EngineServer/SIL.Machine.Serval.Engi RUN dotnet publish ./src/SIL.Machine.Serval.JobServer/SIL.Machine.Serval.JobServer.csproj -c Release -o out_job_server # Build runtime image -FROM mcr.microsoft.com/dotnet/aspnet:6.0-jammy as production +FROM mcr.microsoft.com/dotnet/aspnet:8.0-jammy as production # libgomp needed for thot RUN apt-get update && apt-get install -y libgomp1 WORKDIR /app diff --git a/dockerfile.development b/dockerfile.development index cd6847185..a395da6dd 100644 --- a/dockerfile.development +++ b/dockerfile.development @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/dotnet/sdk:6.0-jammy +FROM mcr.microsoft.com/dotnet/sdk:8.0-jammy # libgomp needed for thot RUN apt update && apt install -y unzip libgomp1 && \ curl -sSL https://aka.ms/getvsdbgsh | /bin/sh /dev/stdin -v latest -l /remote_debugger \ No newline at end of file diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index e5e948573..ffe7cb119 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -103,10 +103,10 @@ public static IMachineBuilder AddUnigramTruecaser(this IMachineBuilder builder) public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, string? connectionString = null) { - connectionString ??= builder.Configuration.GetConnectionString("ClearML"); + connectionString ??= builder.Configuration!.GetConnectionString("ClearML"); builder.Services .AddHttpClient("ClearML") - .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString)) + .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!)) // Add retry policy; fail after approx. 2 + 4 + 8 = 14 seconds .AddTransientHttpErrorPolicy( b => b.WaitAndRetryAsync(3, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt))) @@ -120,8 +120,9 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st builder.Services .AddHttpClient("ClearML-NoRetry") - .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString)); + .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!)); builder.Services.AddSingleton(); + builder.Services.AddHealthChecks().AddCheck("ClearML Health Check"); return builder; @@ -158,7 +159,7 @@ public static IMachineBuilder AddMongoHangfireJobClient( .UseSimpleAssemblyNameTypeSerializer() .UseRecommendedSerializerSettings() .UseMongoStorage( - connectionString ?? builder.Configuration.GetConnectionString("Hangfire"), + connectionString ?? builder.Configuration!.GetConnectionString("Hangfire"), new MongoStorageOptions { MigrationOptions = new MongoMigrationOptions @@ -220,9 +221,9 @@ public static IMachineBuilder AddMemoryDataAccess(this IMachineBuilder builder) public static IMachineBuilder AddMongoDataAccess(this IMachineBuilder builder, string? connectionString = null) { - connectionString ??= builder.Configuration.GetConnectionString("Mongo"); + connectionString ??= builder.Configuration!.GetConnectionString("Mongo"); builder.Services.AddMongoDataAccess( - connectionString, + connectionString!, "SIL.Machine.AspNetCore.Models", o => { @@ -257,7 +258,7 @@ await c.Indexes.CreateOrUpdateAsync( ); } ); - builder.Services.AddHealthChecks().AddMongoDb(connectionString, name: "Mongo"); + builder.Services.AddHealthChecks().AddMongoDb(connectionString!, name: "Mongo"); return builder; } @@ -271,7 +272,7 @@ public static IMachineBuilder AddServalPlatformService( builder.Services .AddGrpcClient(o => { - o.Address = new Uri(connectionString ?? builder.Configuration.GetConnectionString("Serval")); + o.Address = new Uri(connectionString ?? builder.Configuration!.GetConnectionString("Serval")!); }) .ConfigureChannel(o => { @@ -321,7 +322,7 @@ public static IMachineBuilder AddServalTranslationEngineService( options.Interceptors.Add(); options.Interceptors.Add(); }); - builder.AddServalPlatformService(connectionString ?? builder.Configuration.GetConnectionString("Serval")); + builder.AddServalPlatformService(connectionString ?? builder.Configuration!.GetConnectionString("Serval")); engineTypes ??= builder.Configuration?.GetSection("TranslationEngines").Get() ?? new[] { TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt }; @@ -340,7 +341,6 @@ public static IMachineBuilder AddServalTranslationEngineService( break; } } - builder.Services.AddGrpcHealthChecks(); return builder; } @@ -359,7 +359,7 @@ Action configureOptions public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); - var options = config.Get(); + var options = config.Get()!; return builder.AddBuildJobService(options); } @@ -368,7 +368,24 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) if (builder.Configuration is null) builder.AddBuildJobService(o => { }); else + { builder.AddBuildJobService(builder.Configuration.GetSection(BuildJobOptions.Key)); + + string EnginesDir = builder.Configuration + .GetSection(SmtTransferEngineOptions.Key)! + .GetValue("EnginesDir")!; + + string driveLetter = Path.GetPathRoot(EnginesDir)![..1]; + // add health check for disk storage capacity + builder.Services + .AddHealthChecks() + .AddDiskStorageHealthCheck( + x => x.AddDrive(driveLetter, 2_000_000), + "SMT Engine Storage Capacity", + HealthStatus.Degraded + ); + } + return builder; } diff --git a/src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj b/src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj index ab401c602..549e009ff 100644 --- a/src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj +++ b/src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 An ASP.NET Core web API middleware for the Machine library. 1591 enable @@ -26,6 +26,7 @@ + diff --git a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs index a0b010c61..20333359c 100644 --- a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs +++ b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs @@ -9,9 +9,15 @@ public class ServalTranslationEngineServiceV1 : TranslationEngineApi.Translation private readonly Dictionary _engineServices; - public ServalTranslationEngineServiceV1(IEnumerable engineServices) + private readonly HealthCheckService _healthCheckService; + + public ServalTranslationEngineServiceV1( + IEnumerable engineServices, + HealthCheckService healthCheckService + ) { _engineServices = engineServices.ToDictionary(es => es.Type); + _healthCheckService = healthCheckService; } public override async Task Create(CreateRequest request, ServerCallContext context) @@ -127,6 +133,13 @@ ServerCallContext context return new GetQueueSizeResponse { Size = await engineService.GetQueueSizeAsync(context.CancellationToken) }; } + public override async Task HealthCheck(Empty request, ServerCallContext context) + { + HealthReport healthReport = await _healthCheckService.CheckHealthAsync(); + HealthCheckResponse healthCheckResponse = WriteHealthCheckResponse.Generate(healthReport); + return healthCheckResponse; + } + private ITranslationEngineService GetEngineService(string engineTypeStr) { if (_engineServices.TryGetValue(GetEngineType(engineTypeStr), out ITranslationEngineService? service)) diff --git a/src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs b/src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs new file mode 100644 index 000000000..402deee4b --- /dev/null +++ b/src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs @@ -0,0 +1,34 @@ +using Serval.Translation.V1; + +namespace SIL.Machine.AspNetCore.Utils; + +public class WriteHealthCheckResponse +{ + public static HealthCheckResponse Generate(HealthReport healthReport) + { + Dictionary healthCheckResultData = []; + string? healthCheckResultException = null; + + // Combine data and exceptions from all health checks + foreach (KeyValuePair entry in healthReport.Entries) + { + healthCheckResultData.Add(entry.Key, $"{entry.Value.Status}: {entry.Value.Description ?? ""}"); + if ((entry.Value.Exception?.ToString() ?? "") != "") + if(healthCheckResultException is null) + healthCheckResultException = $"{entry.Key}: {entry.Value.Exception}"; + else + healthCheckResultException += $"\n{entry.Key}: {entry.Value.Exception}"; + } + // Assemble response + HealthCheckResponse healthCheckReponse = new HealthCheckResponse{ + Status = (HealthCheckStatus)healthReport.Status, + Duration = healthReport.TotalDuration.ToString(), + Exception = healthCheckResultException + }; + foreach (KeyValuePair entry in healthCheckResultData) + { + healthCheckReponse.Data.Add(entry.Key, entry.Value); + } + return healthCheckReponse; + } +} \ No newline at end of file diff --git a/src/SIL.Machine.Morphology.HermitCrab.Tool/SIL.Machine.Morphology.HermitCrab.Tool.csproj b/src/SIL.Machine.Morphology.HermitCrab.Tool/SIL.Machine.Morphology.HermitCrab.Tool.csproj index 9517bc14e..4b8769225 100644 --- a/src/SIL.Machine.Morphology.HermitCrab.Tool/SIL.Machine.Morphology.HermitCrab.Tool.csproj +++ b/src/SIL.Machine.Morphology.HermitCrab.Tool/SIL.Machine.Morphology.HermitCrab.Tool.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 SIL.Machine.Morphology.HermitCrab true hc diff --git a/src/SIL.Machine.Plugin/SIL.Machine.Plugin.csproj b/src/SIL.Machine.Plugin/SIL.Machine.Plugin.csproj index edc7ad063..7b503e74d 100644 --- a/src/SIL.Machine.Plugin/SIL.Machine.Plugin.csproj +++ b/src/SIL.Machine.Plugin/SIL.Machine.Plugin.csproj @@ -1,7 +1,7 @@ - net6.0 + net8.0 A plugin framework for the Machine library. diff --git a/src/SIL.Machine.Serval.EngineServer/Program.cs b/src/SIL.Machine.Serval.EngineServer/Program.cs index fa843e3ad..50c389b71 100644 --- a/src/SIL.Machine.Serval.EngineServer/Program.cs +++ b/src/SIL.Machine.Serval.EngineServer/Program.cs @@ -29,7 +29,6 @@ app.UseHttpsRedirection(); app.MapServalTranslationEngineService(); -app.MapGrpcHealthChecksService(); app.MapHangfireDashboard(); app.Run(); diff --git a/src/SIL.Machine.Serval.EngineServer/SIL.Machine.Serval.EngineServer.csproj b/src/SIL.Machine.Serval.EngineServer/SIL.Machine.Serval.EngineServer.csproj index 9a6962da2..244245baa 100644 --- a/src/SIL.Machine.Serval.EngineServer/SIL.Machine.Serval.EngineServer.csproj +++ b/src/SIL.Machine.Serval.EngineServer/SIL.Machine.Serval.EngineServer.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 enable enable 34e222a9-ef76-48f9-869e-338547f9bd25 @@ -23,7 +23,7 @@ - + icu.net.dll.config diff --git a/src/SIL.Machine.Serval.JobServer/Program.cs b/src/SIL.Machine.Serval.JobServer/Program.cs index 30c2fa593..38b147243 100644 --- a/src/SIL.Machine.Serval.JobServer/Program.cs +++ b/src/SIL.Machine.Serval.JobServer/Program.cs @@ -25,6 +25,4 @@ var app = builder.Build(); -app.MapHealthChecks("/health"); - app.Run(); diff --git a/src/SIL.Machine.Serval.JobServer/SIL.Machine.Serval.JobServer.csproj b/src/SIL.Machine.Serval.JobServer/SIL.Machine.Serval.JobServer.csproj index 0e6059cc0..949f3e0b2 100644 --- a/src/SIL.Machine.Serval.JobServer/SIL.Machine.Serval.JobServer.csproj +++ b/src/SIL.Machine.Serval.JobServer/SIL.Machine.Serval.JobServer.csproj @@ -1,7 +1,7 @@ - net6.0 + net8.0 enable enable aa9e7440-5a04-4de6-ba51-bab9ef4a62e1 @@ -25,7 +25,7 @@ - + icu.net.dll.config diff --git a/src/SIL.Machine.Tool/SIL.Machine.Tool.csproj b/src/SIL.Machine.Tool/SIL.Machine.Tool.csproj index 61a0d22ca..25e5634d7 100644 --- a/src/SIL.Machine.Tool/SIL.Machine.Tool.csproj +++ b/src/SIL.Machine.Tool/SIL.Machine.Tool.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 SIL.Machine true machine diff --git a/tests/SIL.Machine.AspNetCore.Tests/SIL.Machine.AspNetCore.Tests.csproj b/tests/SIL.Machine.AspNetCore.Tests/SIL.Machine.AspNetCore.Tests.csproj index dd09631e7..7a2923cf6 100644 --- a/tests/SIL.Machine.AspNetCore.Tests/SIL.Machine.AspNetCore.Tests.csproj +++ b/tests/SIL.Machine.AspNetCore.Tests/SIL.Machine.AspNetCore.Tests.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 SIL.Machine.AspNetCore enable enable diff --git a/tests/SIL.Machine.Morphology.HermitCrab.Tests/SIL.Machine.Morphology.HermitCrab.Tests.csproj b/tests/SIL.Machine.Morphology.HermitCrab.Tests/SIL.Machine.Morphology.HermitCrab.Tests.csproj index dbea327e6..c419d0f9a 100644 --- a/tests/SIL.Machine.Morphology.HermitCrab.Tests/SIL.Machine.Morphology.HermitCrab.Tests.csproj +++ b/tests/SIL.Machine.Morphology.HermitCrab.Tests/SIL.Machine.Morphology.HermitCrab.Tests.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 SIL.Machine.Morphology.HermitCrab enable enable diff --git a/tests/SIL.Machine.Tests/SIL.Machine.Tests.csproj b/tests/SIL.Machine.Tests/SIL.Machine.Tests.csproj index ed2c113fe..beb8a730b 100644 --- a/tests/SIL.Machine.Tests/SIL.Machine.Tests.csproj +++ b/tests/SIL.Machine.Tests/SIL.Machine.Tests.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 SIL.Machine enable enable diff --git a/tests/SIL.Machine.Tokenization.SentencePiece.Tests/SIL.Machine.Tokenization.SentencePiece.Tests.csproj b/tests/SIL.Machine.Tokenization.SentencePiece.Tests/SIL.Machine.Tokenization.SentencePiece.Tests.csproj index 4e875a4b4..7f64c54bf 100644 --- a/tests/SIL.Machine.Tokenization.SentencePiece.Tests/SIL.Machine.Tokenization.SentencePiece.Tests.csproj +++ b/tests/SIL.Machine.Tokenization.SentencePiece.Tests/SIL.Machine.Tokenization.SentencePiece.Tests.csproj @@ -1,7 +1,7 @@ - net6.0 + net8.0 enable enable false diff --git a/tests/SIL.Machine.Translation.Thot.Tests/SIL.Machine.Translation.Thot.Tests.csproj b/tests/SIL.Machine.Translation.Thot.Tests/SIL.Machine.Translation.Thot.Tests.csproj index 4ebd167f9..a79528cc3 100644 --- a/tests/SIL.Machine.Translation.Thot.Tests/SIL.Machine.Translation.Thot.Tests.csproj +++ b/tests/SIL.Machine.Translation.Thot.Tests/SIL.Machine.Translation.Thot.Tests.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 SIL.Machine.Translation.Thot enable enable From cc532fe219fdee4c759b6919e81c312a6f194207 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 11 Jan 2024 09:27:46 -0500 Subject: [PATCH 2/8] Clean up fixes --- .../ServalTranslationEngineServiceV1.cs | 2 +- .../Utils/WriteHealthCheckResponse.cs | 34 ------------------- 2 files changed, 1 insertion(+), 35 deletions(-) delete mode 100644 src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs diff --git a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs index 20333359c..fb87853c6 100644 --- a/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs +++ b/src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs @@ -136,7 +136,7 @@ ServerCallContext context public override async Task HealthCheck(Empty request, ServerCallContext context) { HealthReport healthReport = await _healthCheckService.CheckHealthAsync(); - HealthCheckResponse healthCheckResponse = WriteHealthCheckResponse.Generate(healthReport); + HealthCheckResponse healthCheckResponse = WriteGrpcHealthCheckResponse.Generate(healthReport); return healthCheckResponse; } diff --git a/src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs b/src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs deleted file mode 100644 index 402deee4b..000000000 --- a/src/SIL.Machine.AspNetCore/Utils/WriteHealthCheckResponse.cs +++ /dev/null @@ -1,34 +0,0 @@ -using Serval.Translation.V1; - -namespace SIL.Machine.AspNetCore.Utils; - -public class WriteHealthCheckResponse -{ - public static HealthCheckResponse Generate(HealthReport healthReport) - { - Dictionary healthCheckResultData = []; - string? healthCheckResultException = null; - - // Combine data and exceptions from all health checks - foreach (KeyValuePair entry in healthReport.Entries) - { - healthCheckResultData.Add(entry.Key, $"{entry.Value.Status}: {entry.Value.Description ?? ""}"); - if ((entry.Value.Exception?.ToString() ?? "") != "") - if(healthCheckResultException is null) - healthCheckResultException = $"{entry.Key}: {entry.Value.Exception}"; - else - healthCheckResultException += $"\n{entry.Key}: {entry.Value.Exception}"; - } - // Assemble response - HealthCheckResponse healthCheckReponse = new HealthCheckResponse{ - Status = (HealthCheckStatus)healthReport.Status, - Duration = healthReport.TotalDuration.ToString(), - Exception = healthCheckResultException - }; - foreach (KeyValuePair entry in healthCheckResultData) - { - healthCheckReponse.Data.Add(entry.Key, entry.Value); - } - return healthCheckReponse; - } -} \ No newline at end of file From 4058b1af2010db8c5446326165e1eb8cb4028d04 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 11 Jan 2024 12:59:36 -0500 Subject: [PATCH 3/8] Clean up ClearML authentication error reporting. --- .../Services/ClearMLAuthenticationService.cs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs b/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs index 3efc2a81b..172c9b772 100644 --- a/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs +++ b/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs @@ -10,7 +10,7 @@ public class ClearMLAuthenticationService : RecurrentTask, IClearMLAuthenticatio // technically, the token should be good for 30 days, but let's refresh each hour // to know well ahead of time if something is wrong. private static readonly TimeSpan RefreshPeriod = TimeSpan.FromSeconds(3600); - private string _authToken = ""; + private string? _authToken = ""; public ClearMLAuthenticationService( IServiceProvider services, @@ -29,14 +29,14 @@ public async Task GetAuthTokenAsync(CancellationToken cancellationToken { using (await _lock.LockAsync(cancellationToken)) { - if (_authToken is "") + if (_authToken is null || _authToken is "") { //Should only happen once, so no different in cost than previous solution _logger.LogInformation("Token was empty; refreshing"); await AuthorizeAsync(cancellationToken); } } - return _authToken; + return _authToken ?? throw new Exception("ClearML authentication token not found in response."); } protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken) @@ -49,6 +49,9 @@ protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken catch (Exception e) { _logger.LogError(e, "Error occurred while refreshing ClearML authentication token."); + if (_authToken is null || _authToken is "") + // The ClearML token never was set. We can't continue without it. + throw; } } @@ -64,6 +67,8 @@ private async Task AuthorizeAsync(CancellationToken cancellationToken) HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken); string result = await response.Content.ReadAsStringAsync(cancellationToken); _authToken = (string)((JsonObject?)JsonNode.Parse(result))?["data"]?["token"]!; + if (_authToken is null || _authToken is "") + throw new Exception($"ClearML authentication failed - {response.StatusCode}: {response.ReasonPhrase}"); _logger.LogInformation("ClearML Authentication Token Refresh Successful."); } } From 9f5734a9db53f0b0e1c353639f7fd42bd13fdb34 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 11 Jan 2024 13:01:31 -0500 Subject: [PATCH 4/8] Update danger level to 1GB hard drive space left. --- .../Configuration/IMachineBuilderExtensions.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index ffe7cb119..321e8ce3d 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -380,7 +380,7 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) builder.Services .AddHealthChecks() .AddDiskStorageHealthCheck( - x => x.AddDrive(driveLetter, 2_000_000), + x => x.AddDrive(driveLetter, 1_000), // 1GB "SMT Engine Storage Capacity", HealthStatus.Degraded ); From 43008f2e6ec924bf23ad525aafd83e2b802580e8 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 11 Jan 2024 16:04:59 -0500 Subject: [PATCH 5/8] Remove null forgiving operators. --- .../IMachineBuilderExtensions.cs | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index 321e8ce3d..d37630873 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -103,7 +103,10 @@ public static IMachineBuilder AddUnigramTruecaser(this IMachineBuilder builder) public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, string? connectionString = null) { - connectionString ??= builder.Configuration!.GetConnectionString("ClearML"); + connectionString ??= builder.Configuration?.GetConnectionString("ClearML"); + if (connectionString is null) + throw new InvalidOperationException("ClearML connection string is required"); + builder.Services .AddHttpClient("ClearML") .ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!)) @@ -153,13 +156,17 @@ public static IMachineBuilder AddMongoHangfireJobClient( string? connectionString = null ) { + connectionString ??= builder.Configuration?.GetConnectionString("Hangfire"); + if (connectionString is null) + throw new InvalidOperationException("Hangfire connection string is required"); + builder.Services.AddHangfire( c => c.SetDataCompatibilityLevel(CompatibilityLevel.Version_170) .UseSimpleAssemblyNameTypeSerializer() .UseRecommendedSerializerSettings() .UseMongoStorage( - connectionString ?? builder.Configuration!.GetConnectionString("Hangfire"), + connectionString, new MongoStorageOptions { MigrationOptions = new MongoMigrationOptions @@ -184,7 +191,7 @@ public static IMachineBuilder AddHangfireJobServer( { engineTypes ??= builder.Configuration?.GetSection("TranslationEngines").Get() - ?? new[] { TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt }; + ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt]; var queues = new List(); foreach (TranslationEngineType engineType in engineTypes.Distinct()) { @@ -221,7 +228,9 @@ public static IMachineBuilder AddMemoryDataAccess(this IMachineBuilder builder) public static IMachineBuilder AddMongoDataAccess(this IMachineBuilder builder, string? connectionString = null) { - connectionString ??= builder.Configuration!.GetConnectionString("Mongo"); + connectionString ??= builder.Configuration?.GetConnectionString("Mongo"); + if (connectionString is null) + throw new InvalidOperationException("Mongo connection string is required"); builder.Services.AddMongoDataAccess( connectionString!, "SIL.Machine.AspNetCore.Models", @@ -263,16 +272,13 @@ await c.Indexes.CreateOrUpdateAsync( return builder; } - public static IMachineBuilder AddServalPlatformService( - this IMachineBuilder builder, - string? connectionString = null - ) + public static IMachineBuilder AddServalPlatformService(this IMachineBuilder builder, string connectionString) { builder.Services.AddScoped(); builder.Services .AddGrpcClient(o => { - o.Address = new Uri(connectionString ?? builder.Configuration!.GetConnectionString("Serval")!); + o.Address = new Uri(connectionString); }) .ConfigureChannel(o => { @@ -322,10 +328,13 @@ public static IMachineBuilder AddServalTranslationEngineService( options.Interceptors.Add(); options.Interceptors.Add(); }); - builder.AddServalPlatformService(connectionString ?? builder.Configuration!.GetConnectionString("Serval")); + connectionString ??= builder.Configuration?.GetConnectionString("Serval"); + if (connectionString is null) + throw new InvalidOperationException("Serval connection string is required"); + builder.AddServalPlatformService(connectionString); engineTypes ??= builder.Configuration?.GetSection("TranslationEngines").Get() - ?? new[] { TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt }; + ?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt]; foreach (TranslationEngineType engineType in engineTypes.Distinct()) { switch (engineType) @@ -359,7 +368,7 @@ Action configureOptions public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); - var options = config.Get()!; + var options = config.Get() ?? throw new InvalidOperationException("BuildJobOPtions are required"); return builder.AddBuildJobService(options); } @@ -373,7 +382,7 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) string EnginesDir = builder.Configuration .GetSection(SmtTransferEngineOptions.Key)! - .GetValue("EnginesDir")!; + .GetValue("EnginesDir") ?? throw new InvalidOperationException("EnginesDir is required"); string driveLetter = Path.GetPathRoot(EnginesDir)![..1]; // add health check for disk storage capacity From bc6080d70fbc81908e3ec8687fb53dd7e90de146 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 11 Jan 2024 16:07:42 -0500 Subject: [PATCH 6/8] Fix build error --- .../Configuration/IMachineBuilderExtensions.cs | 9 +++++---- .../SimplexModelWeightTuner.cs | 1 - 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index d37630873..9e80a4e95 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -272,8 +272,12 @@ await c.Indexes.CreateOrUpdateAsync( return builder; } - public static IMachineBuilder AddServalPlatformService(this IMachineBuilder builder, string connectionString) + public static IMachineBuilder AddServalPlatformService(this IMachineBuilder builder, string? connectionString = null) { + connectionString ??= builder.Configuration?.GetConnectionString("Serval"); + if (connectionString is null) + throw new InvalidOperationException("Serval connection string is required"); + builder.Services.AddScoped(); builder.Services .AddGrpcClient(o => @@ -328,9 +332,6 @@ public static IMachineBuilder AddServalTranslationEngineService( options.Interceptors.Add(); options.Interceptors.Add(); }); - connectionString ??= builder.Configuration?.GetConnectionString("Serval"); - if (connectionString is null) - throw new InvalidOperationException("Serval connection string is required"); builder.AddServalPlatformService(connectionString); engineTypes ??= builder.Configuration?.GetSection("TranslationEngines").Get() diff --git a/src/SIL.Machine.Translation.Thot/SimplexModelWeightTuner.cs b/src/SIL.Machine.Translation.Thot/SimplexModelWeightTuner.cs index 32b28a2a4..6880e80eb 100644 --- a/src/SIL.Machine.Translation.Thot/SimplexModelWeightTuner.cs +++ b/src/SIL.Machine.Translation.Thot/SimplexModelWeightTuner.cs @@ -44,7 +44,6 @@ double Evaluate(Vector weights, int evalCount) } return quality; } - ; progress.Report(new ProgressStatus(0, MaxProgressFunctionEvaluations)); var simplex = new NelderMeadSimplex(ConvergenceTolerance, MaxFunctionEvaluations, 1.0); MinimizationResult result = simplex.FindMinimum( From 4e2f40357ada23a282e47f52e784393da21673c4 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Thu, 11 Jan 2024 21:41:41 -0500 Subject: [PATCH 7/8] Updates from review --- .../Configuration/IMachineBuilderExtensions.cs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index 9e80a4e95..47a585be6 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -369,8 +369,9 @@ Action configureOptions public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, IConfiguration config) { builder.Services.Configure(config); - var options = config.Get() ?? throw new InvalidOperationException("BuildJobOPtions are required"); - return builder.AddBuildJobService(options); + var buildJobOptions = new BuildJobOptions(); + config.GetSection(BuildJobOptions.Key).Bind(buildJobOptions); + return builder.AddBuildJobService(buildJobOptions); } public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) @@ -381,11 +382,9 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) { builder.AddBuildJobService(builder.Configuration.GetSection(BuildJobOptions.Key)); - string EnginesDir = builder.Configuration - .GetSection(SmtTransferEngineOptions.Key)! - .GetValue("EnginesDir") ?? throw new InvalidOperationException("EnginesDir is required"); - - string driveLetter = Path.GetPathRoot(EnginesDir)![..1]; + var smtTransferEngineOptions = new SmtTransferEngineOptions(); + builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); + string driveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)![..1]; // add health check for disk storage capacity builder.Services .AddHealthChecks() From a14b3d8aa24a6e8bbf66ee5a7fb10608f67f6942 Mon Sep 17 00:00:00 2001 From: John Lambert Date: Tue, 16 Jan 2024 10:01:23 -0500 Subject: [PATCH 8/8] reviewer comments. --- .../IMachineBuilderExtensions.cs | 4 +- .../Services/ClearMLAuthenticationService.cs | 45 +++++++++---------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs index 47a585be6..ade0347d9 100644 --- a/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs +++ b/src/SIL.Machine.AspNetCore/Configuration/IMachineBuilderExtensions.cs @@ -384,7 +384,9 @@ public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder) var smtTransferEngineOptions = new SmtTransferEngineOptions(); builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions); - string driveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)![..1]; + string? driveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)?[..1]; + if(driveLetter is null) + throw new InvalidOperationException("SMT Engine directory is required"); // add health check for disk storage capacity builder.Services .AddHealthChecks() diff --git a/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs b/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs index 172c9b772..721d68868 100644 --- a/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs +++ b/src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs @@ -1,42 +1,34 @@ namespace SIL.Machine.AspNetCore.Services; -public class ClearMLAuthenticationService : RecurrentTask, IClearMLAuthenticationService +public class ClearMLAuthenticationService( + IServiceProvider services, + IHttpClientFactory httpClientFactory, + IOptionsMonitor options, + ILogger logger + ) : RecurrentTask("ClearML authentication service", services, RefreshPeriod, logger), IClearMLAuthenticationService { - private readonly HttpClient _httpClient; - private readonly IOptionsMonitor _options; - private readonly ILogger _logger; + private readonly HttpClient _httpClient = httpClientFactory.CreateClient("ClearML"); + private readonly IOptionsMonitor _options = options; + private readonly ILogger _logger = logger; private readonly AsyncLock _lock = new(); // technically, the token should be good for 30 days, but let's refresh each hour // to know well ahead of time if something is wrong. private static readonly TimeSpan RefreshPeriod = TimeSpan.FromSeconds(3600); - private string? _authToken = ""; - - public ClearMLAuthenticationService( - IServiceProvider services, - IHttpClientFactory httpClientFactory, - IOptionsMonitor options, - ILogger logger - ) - : base("ClearML authentication service", services, RefreshPeriod, logger) - { - _httpClient = httpClientFactory.CreateClient("ClearML"); - _options = options; - _logger = logger; - } + private string _authToken = ""; public async Task GetAuthTokenAsync(CancellationToken cancellationToken = default) { using (await _lock.LockAsync(cancellationToken)) { - if (_authToken is null || _authToken is "") + if (_authToken is "") { //Should only happen once, so no different in cost than previous solution _logger.LogInformation("Token was empty; refreshing"); await AuthorizeAsync(cancellationToken); } } - return _authToken ?? throw new Exception("ClearML authentication token not found in response."); + return _authToken; } protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken cancellationToken) @@ -48,10 +40,14 @@ protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken } catch (Exception e) { - _logger.LogError(e, "Error occurred while refreshing ClearML authentication token."); - if (_authToken is null || _authToken is "") + if (_authToken is ""){ + _logger.LogError(e, "Error occurred while aquiring ClearML authentication token for the first time."); // The ClearML token never was set. We can't continue without it. throw; + } + else + _logger.LogError(e, "Error occurred while refreshing ClearML authentication token."); + } } @@ -66,9 +62,10 @@ private async Task AuthorizeAsync(CancellationToken cancellationToken) request.Headers.Add("Authorization", $"Basic {base64EncodedAuthenticationString}"); HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken); string result = await response.Content.ReadAsStringAsync(cancellationToken); - _authToken = (string)((JsonObject?)JsonNode.Parse(result))?["data"]?["token"]!; - if (_authToken is null || _authToken is "") + string? refreshedToken = (string?)((JsonObject?)JsonNode.Parse(result))?["data"]?["token"]; + if (refreshedToken is null || refreshedToken is "") throw new Exception($"ClearML authentication failed - {response.StatusCode}: {response.ReasonPhrase}"); + _authToken = refreshedToken; _logger.LogInformation("ClearML Authentication Token Refresh Successful."); } }