Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drive health #156

Merged
merged 8 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/dotnet/sdk:6.0-jammy AS build-env
FROM mcr.microsoft.com/dotnet/sdk:8.0-jammy AS build-env
WORKDIR /app

RUN apt-get update && apt-get install -y g++ curl cmake
Expand All @@ -12,7 +12,7 @@ RUN dotnet publish ./src/SIL.Machine.Serval.EngineServer/SIL.Machine.Serval.Engi
RUN dotnet publish ./src/SIL.Machine.Serval.JobServer/SIL.Machine.Serval.JobServer.csproj -c Release -o out_job_server

# Build runtime image
FROM mcr.microsoft.com/dotnet/aspnet:6.0-jammy as production
FROM mcr.microsoft.com/dotnet/aspnet:8.0-jammy as production
# libgomp needed for thot
RUN apt-get update && apt-get install -y libgomp1
WORKDIR /app
Expand Down
2 changes: 1 addition & 1 deletion dockerfile.development
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM mcr.microsoft.com/dotnet/sdk:6.0-jammy
FROM mcr.microsoft.com/dotnet/sdk:8.0-jammy
# libgomp needed for thot
RUN apt update && apt install -y unzip libgomp1 && \
curl -sSL https://aka.ms/getvsdbgsh | /bin/sh /dev/stdin -v latest -l /remote_debugger
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,13 @@ public static IMachineBuilder AddUnigramTruecaser(this IMachineBuilder builder)

public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, string? connectionString = null)
{
connectionString ??= builder.Configuration.GetConnectionString("ClearML");
connectionString ??= builder.Configuration?.GetConnectionString("ClearML");
if (connectionString is null)
throw new InvalidOperationException("ClearML connection string is required");

builder.Services
.AddHttpClient("ClearML")
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString))
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!))
// Add retry policy; fail after approx. 2 + 4 + 8 = 14 seconds
.AddTransientHttpErrorPolicy(
b => b.WaitAndRetryAsync(3, retryAttempt => TimeSpan.FromSeconds(Math.Pow(2, retryAttempt)))
Expand All @@ -120,8 +123,9 @@ public static IMachineBuilder AddClearMLService(this IMachineBuilder builder, st

builder.Services
.AddHttpClient("ClearML-NoRetry")
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString));
.ConfigureHttpClient(httpClient => httpClient.BaseAddress = new Uri(connectionString!));
builder.Services.AddSingleton<ClearMLHealthCheck>();

builder.Services.AddHealthChecks().AddCheck<ClearMLHealthCheck>("ClearML Health Check");

return builder;
Expand Down Expand Up @@ -152,13 +156,17 @@ public static IMachineBuilder AddMongoHangfireJobClient(
string? connectionString = null
)
{
connectionString ??= builder.Configuration?.GetConnectionString("Hangfire");
if (connectionString is null)
throw new InvalidOperationException("Hangfire connection string is required");

builder.Services.AddHangfire(
c =>
c.SetDataCompatibilityLevel(CompatibilityLevel.Version_170)
.UseSimpleAssemblyNameTypeSerializer()
.UseRecommendedSerializerSettings()
.UseMongoStorage(
connectionString ?? builder.Configuration.GetConnectionString("Hangfire"),
connectionString,
new MongoStorageOptions
{
MigrationOptions = new MongoMigrationOptions
Expand All @@ -183,7 +191,7 @@ public static IMachineBuilder AddHangfireJobServer(
{
engineTypes ??=
builder.Configuration?.GetSection("TranslationEngines").Get<TranslationEngineType[]?>()
?? new[] { TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt };
?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt];
var queues = new List<string>();
foreach (TranslationEngineType engineType in engineTypes.Distinct())
{
Expand Down Expand Up @@ -220,9 +228,11 @@ public static IMachineBuilder AddMemoryDataAccess(this IMachineBuilder builder)

public static IMachineBuilder AddMongoDataAccess(this IMachineBuilder builder, string? connectionString = null)
{
connectionString ??= builder.Configuration.GetConnectionString("Mongo");
connectionString ??= builder.Configuration?.GetConnectionString("Mongo");
if (connectionString is null)
throw new InvalidOperationException("Mongo connection string is required");
builder.Services.AddMongoDataAccess(
connectionString,
connectionString!,
"SIL.Machine.AspNetCore.Models",
o =>
{
Expand Down Expand Up @@ -257,21 +267,22 @@ await c.Indexes.CreateOrUpdateAsync(
);
}
);
builder.Services.AddHealthChecks().AddMongoDb(connectionString, name: "Mongo");
builder.Services.AddHealthChecks().AddMongoDb(connectionString!, name: "Mongo");

return builder;
}

public static IMachineBuilder AddServalPlatformService(
this IMachineBuilder builder,
string? connectionString = null
)
public static IMachineBuilder AddServalPlatformService(this IMachineBuilder builder, string? connectionString = null)
{
connectionString ??= builder.Configuration?.GetConnectionString("Serval");
if (connectionString is null)
throw new InvalidOperationException("Serval connection string is required");

builder.Services.AddScoped<IPlatformService, ServalPlatformService>();
builder.Services
.AddGrpcClient<TranslationPlatformApi.TranslationPlatformApiClient>(o =>
{
o.Address = new Uri(connectionString ?? builder.Configuration.GetConnectionString("Serval"));
o.Address = new Uri(connectionString);
})
.ConfigureChannel(o =>
{
Expand Down Expand Up @@ -321,10 +332,10 @@ public static IMachineBuilder AddServalTranslationEngineService(
options.Interceptors.Add<CancellationInterceptor>();
options.Interceptors.Add<UnimplementedInterceptor>();
});
builder.AddServalPlatformService(connectionString ?? builder.Configuration.GetConnectionString("Serval"));
builder.AddServalPlatformService(connectionString);
engineTypes ??=
builder.Configuration?.GetSection("TranslationEngines").Get<TranslationEngineType[]?>()
?? new[] { TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt };
?? [TranslationEngineType.SmtTransfer, TranslationEngineType.Nmt];
foreach (TranslationEngineType engineType in engineTypes.Distinct())
{
switch (engineType)
Expand All @@ -340,7 +351,6 @@ public static IMachineBuilder AddServalTranslationEngineService(
break;
}
}
builder.Services.AddGrpcHealthChecks();

return builder;
}
Expand All @@ -359,16 +369,34 @@ Action<BuildJobOptions> configureOptions
public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder, IConfiguration config)
{
builder.Services.Configure<BuildJobOptions>(config);
var options = config.Get<BuildJobOptions>();
return builder.AddBuildJobService(options);
var buildJobOptions = new BuildJobOptions();
config.GetSection(BuildJobOptions.Key).Bind(buildJobOptions);
return builder.AddBuildJobService(buildJobOptions);
}

public static IMachineBuilder AddBuildJobService(this IMachineBuilder builder)
{
if (builder.Configuration is null)
builder.AddBuildJobService(o => { });
else
{
builder.AddBuildJobService(builder.Configuration.GetSection(BuildJobOptions.Key));

var smtTransferEngineOptions = new SmtTransferEngineOptions();
builder.Configuration.GetSection(SmtTransferEngineOptions.Key).Bind(smtTransferEngineOptions);
string? driveLetter = Path.GetPathRoot(smtTransferEngineOptions.EnginesDir)?[..1];
if(driveLetter is null)
throw new InvalidOperationException("SMT Engine directory is required");
// add health check for disk storage capacity
builder.Services
.AddHealthChecks()
.AddDiskStorageHealthCheck(
x => x.AddDrive(driveLetter, 1_000), // 1GB
"SMT Engine Storage Capacity",
HealthStatus.Degraded
);
}

return builder;
}

Expand Down
3 changes: 2 additions & 1 deletion src/SIL.Machine.AspNetCore/SIL.Machine.AspNetCore.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Description>An ASP.NET Core web API middleware for the Machine library.</Description>
<NoWarn>1591</NoWarn>
<ImplicitUsings>enable</ImplicitUsings>
Expand All @@ -26,6 +26,7 @@

<ItemGroup>
<PackageReference Include="AspNetCore.HealthChecks.MongoDb" Version="6.0.2" />
<PackageReference Include="AspNetCore.HealthChecks.System" Version="6.0.2" />
<PackageReference Include="AWSSDK.S3" Version="3.7.205.8" />
<PackageReference Include="Grpc.AspNetCore" Version="2.57.0" />
<PackageReference Include="Grpc.AspNetCore.HealthChecks" Version="2.57.0" />
Expand Down
40 changes: 21 additions & 19 deletions src/SIL.Machine.AspNetCore/Services/ClearMLAuthenticationService.cs
Original file line number Diff line number Diff line change
@@ -1,30 +1,22 @@
namespace SIL.Machine.AspNetCore.Services;

public class ClearMLAuthenticationService : RecurrentTask, IClearMLAuthenticationService
public class ClearMLAuthenticationService(
IServiceProvider services,
IHttpClientFactory httpClientFactory,
IOptionsMonitor<ClearMLOptions> options,
ILogger<ClearMLAuthenticationService> logger
) : RecurrentTask("ClearML authentication service", services, RefreshPeriod, logger), IClearMLAuthenticationService
{
private readonly HttpClient _httpClient;
private readonly IOptionsMonitor<ClearMLOptions> _options;
private readonly ILogger<ClearMLAuthenticationService> _logger;
private readonly HttpClient _httpClient = httpClientFactory.CreateClient("ClearML");
private readonly IOptionsMonitor<ClearMLOptions> _options = options;
private readonly ILogger<ClearMLAuthenticationService> _logger = logger;
private readonly AsyncLock _lock = new();

// technically, the token should be good for 30 days, but let's refresh each hour
// to know well ahead of time if something is wrong.
private static readonly TimeSpan RefreshPeriod = TimeSpan.FromSeconds(3600);
private string _authToken = "";

public ClearMLAuthenticationService(
IServiceProvider services,
IHttpClientFactory httpClientFactory,
IOptionsMonitor<ClearMLOptions> options,
ILogger<ClearMLAuthenticationService> logger
)
: base("ClearML authentication service", services, RefreshPeriod, logger)
{
_httpClient = httpClientFactory.CreateClient("ClearML");
_options = options;
_logger = logger;
}

public async Task<string> GetAuthTokenAsync(CancellationToken cancellationToken = default)
{
using (await _lock.LockAsync(cancellationToken))
Expand All @@ -48,7 +40,14 @@ protected override async Task DoWorkAsync(IServiceScope scope, CancellationToken
}
catch (Exception e)
{
_logger.LogError(e, "Error occurred while refreshing ClearML authentication token.");
if (_authToken is ""){
_logger.LogError(e, "Error occurred while aquiring ClearML authentication token for the first time.");
// The ClearML token never was set. We can't continue without it.
throw;
}
else
_logger.LogError(e, "Error occurred while refreshing ClearML authentication token.");

}
}

Expand All @@ -63,7 +62,10 @@ private async Task AuthorizeAsync(CancellationToken cancellationToken)
request.Headers.Add("Authorization", $"Basic {base64EncodedAuthenticationString}");
HttpResponseMessage response = await _httpClient.SendAsync(request, cancellationToken);
string result = await response.Content.ReadAsStringAsync(cancellationToken);
_authToken = (string)((JsonObject?)JsonNode.Parse(result))?["data"]?["token"]!;
string? refreshedToken = (string?)((JsonObject?)JsonNode.Parse(result))?["data"]?["token"];
if (refreshedToken is null || refreshedToken is "")
throw new Exception($"ClearML authentication failed - {response.StatusCode}: {response.ReasonPhrase}");
_authToken = refreshedToken;
_logger.LogInformation("ClearML Authentication Token Refresh Successful.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,15 @@

private readonly Dictionary<TranslationEngineType, ITranslationEngineService> _engineServices;

public ServalTranslationEngineServiceV1(IEnumerable<ITranslationEngineService> engineServices)
private readonly HealthCheckService _healthCheckService;

public ServalTranslationEngineServiceV1(
IEnumerable<ITranslationEngineService> engineServices,
HealthCheckService healthCheckService
)
{
_engineServices = engineServices.ToDictionary(es => es.Type);
_healthCheckService = healthCheckService;
}

public override async Task<Empty> Create(CreateRequest request, ServerCallContext context)
Expand Down Expand Up @@ -127,6 +133,13 @@
return new GetQueueSizeResponse { Size = await engineService.GetQueueSizeAsync(context.CancellationToken) };
}

public override async Task<HealthCheckResponse> HealthCheck(Empty request, ServerCallContext context)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on ubuntu-20.04

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

The type or namespace name 'HealthCheckResponse' could not be found (are you missing a using directive or an assembly reference?)

Check failure on line 136 in src/SIL.Machine.AspNetCore/Services/ServalTranslationEngineServiceV1.cs

View workflow job for this annotation

GitHub Actions / Build on windows-latest

'ServalTranslationEngineServiceV1.HealthCheck(Empty, ServerCallContext)': no suitable method found to override
{
HealthReport healthReport = await _healthCheckService.CheckHealthAsync();
HealthCheckResponse healthCheckResponse = WriteGrpcHealthCheckResponse.Generate(healthReport);
return healthCheckResponse;
}

private ITranslationEngineService GetEngineService(string engineTypeStr)
{
if (_engineServices.TryGetValue(GetEngineType(engineTypeStr), out ITranslationEngineService? service))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine.Morphology.HermitCrab</RootNamespace>
<PackAsTool>true</PackAsTool>
<ToolCommandName>hc</ToolCommandName>
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.Plugin/SIL.Machine.Plugin.csproj
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Description>A plugin framework for the Machine library.</Description>
</PropertyGroup>

Expand Down
1 change: 0 additions & 1 deletion src/SIL.Machine.Serval.EngineServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
app.UseHttpsRedirection();

app.MapServalTranslationEngineService();
app.MapGrpcHealthChecksService();
app.MapHangfireDashboard();

app.Run();
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<UserSecretsId>34e222a9-ef76-48f9-869e-338547f9bd25</UserSecretsId>
Expand All @@ -23,7 +23,7 @@

<!-- Include icu.net.dll.config - which is only available after the package is built -->
<ItemGroup>
<ResolvedFileToPublish Include=".\bin\Release\net6.0\icu.net.dll.config">
<ResolvedFileToPublish Include=".\bin\Release\net8.0\icu.net.dll.config">
<RelativePath>icu.net.dll.config</RelativePath>
</ResolvedFileToPublish>
</ItemGroup>
Expand Down
2 changes: 0 additions & 2 deletions src/SIL.Machine.Serval.JobServer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,4 @@

var app = builder.Build();

app.MapHealthChecks("/health");

app.Run();
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<UserSecretsId>aa9e7440-5a04-4de6-ba51-bab9ef4a62e1</UserSecretsId>
Expand All @@ -25,7 +25,7 @@

<!-- Include icu.net.dll.config - which is only available after the package is built -->
<ItemGroup>
<ResolvedFileToPublish Include=".\bin\Release\net6.0\icu.net.dll.config">
<ResolvedFileToPublish Include=".\bin\Release\net8.0\icu.net.dll.config">
<RelativePath>icu.net.dll.config</RelativePath>
</ResolvedFileToPublish>
</ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion src/SIL.Machine.Tool/SIL.Machine.Tool.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine</RootNamespace>
<PackAsTool>true</PackAsTool>
<ToolCommandName>machine</ToolCommandName>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ double Evaluate(Vector weights, int evalCount)
}
return quality;
}
;
progress.Report(new ProgressStatus(0, MaxProgressFunctionEvaluations));
var simplex = new NelderMeadSimplex(ConvergenceTolerance, MaxFunctionEvaluations, 1.0);
MinimizationResult result = simplex.FindMinimum(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine.AspNetCore</RootNamespace>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFramework>net8.0</TargetFramework>
<RootNamespace>SIL.Machine.Morphology.HermitCrab</RootNamespace>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
Expand Down
Loading
Loading