Skip to content

Commit

Permalink
Try to fix reset-project Playwright tests (#789)
Browse files Browse the repository at this point in the history
We believe the failures in the Playwright tests are due to NFS caching
the directory entry, and thus not noticing that the directory has new
contents, resulting in Mercurial thinking the directory is still empty
for some time after the zip file upload has completed. It is possible to
force NFS to refresh its cache of a directory by doing opendir() and
closedir() on that directory. We will add an `invalidatedircache`
command to our hgweb command runner which will run `ls` (which, among
other system calls, calls opendir() and closedir()), and call that
command after the lexbox pod changes the repo.

The other piece of the puzzle appears to be caching on the side of the
LexBox API pod's NFS client. According to the `nfs(5)` manpage, NFS
writes are cached until one of the following events occurs:

* Memory pressure forces reclamation of system memory resources.
* An application flushes file data explicitly with sync, msync, or fsync.
* An application closes a file with close.
* The file is locked/unlocked via fcntl.

So doing an `opendir/closedir` on the hgweb pod is not enough; we also
need to force the lexbox pod's NFS client to send its data to the NFS
server. I've implemented that by doing the equivalent of running
`mkdir random-name; rmdir random-name` inside the Mercurial repo after
the LexBox API pod touches it (e.g. a project reset or FinishReset
operation), followed by a call to `ls` on the hgweb side.
  • Loading branch information
rmunn authored May 30, 2024
1 parent 3c62210 commit 20ffa08
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 51 deletions.
69 changes: 69 additions & 0 deletions backend/LexBoxApi/Services/HgService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ public partial class HgService : IHgService, IHostedService
private const string DELETED_REPO_FOLDER = "_____deleted_____";
private const string TEMP_REPO_FOLDER = "_____temp_____";

private const string AllZeroHash = "0000000000000000000000000000000000000000";

private readonly IOptions<HgConfig> _options;
private readonly Lazy<HttpClient> _hgClient;
private readonly ILogger<HgService> _logger;
Expand Down Expand Up @@ -67,6 +69,8 @@ await Task.Run(() =>
{
InitRepoAt(new DirectoryInfo(PrefixRepoFilePath(code)));
});
await InvalidateDirCache(code);
await WaitForRepoEmptyState(code, RepoEmptyState.Empty);
}

private void InitRepoAt(DirectoryInfo repoDirectory)
Expand Down Expand Up @@ -104,6 +108,8 @@ public async Task ResetRepo(string code)
await SoftDeleteRepo(code, $"{FileUtils.ToTimestamp(DateTimeOffset.UtcNow)}__reset");
//we must init the repo as uploading a zip is optional
tmpRepo.MoveTo(PrefixRepoFilePath(code));
await InvalidateDirCache(code);
await WaitForRepoEmptyState(code, RepoEmptyState.Empty);
}

public async Task FinishReset(string code, Stream zipFile)
Expand Down Expand Up @@ -137,6 +143,11 @@ await Task.Run(() =>
// Now we're ready to move the new repo into place, replacing the old one
await DeleteRepo(code);
tempRepo.MoveTo(PrefixRepoFilePath(code));
await InvalidateDirCache(code);
// If someone uploaded an *empty* repo, we don't want to wait forever for a non-empty state
var changelogPath = Path.Join(PrefixRepoFilePath(code), ".hg", "store", "00changelog.i");
var expectedState = File.Exists(changelogPath) ? RepoEmptyState.NonEmpty : RepoEmptyState.Empty;
await WaitForRepoEmptyState(code, expectedState);
}

/// <summary>
Expand Down Expand Up @@ -262,6 +273,58 @@ public async Task<HttpContent> ExecuteHgRecover(string code, CancellationToken t
return response;
}

public Task<HttpContent> InvalidateDirCache(string code, CancellationToken token = default)
{
var repoPath = Path.Join(PrefixRepoFilePath(code));
if (Directory.Exists(repoPath))
{
// Invalidate NFS directory cache by forcing a write and re-read of the repo directory
var randomPath = Path.Join(repoPath, Path.GetRandomFileName());
while (File.Exists(randomPath) || Directory.Exists(randomPath)) { randomPath = Path.Join(repoPath, Path.GetRandomFileName()); }
try
{
// Create and delete a directory since that's slightly safer than a file
var d = Directory.CreateDirectory(randomPath);
d.Delete();
}
catch (Exception) { }
}
var result = ExecuteHgCommandServerCommand(code, "invalidatedircache", token);
return result;
}

public async Task<string> GetTipHash(string code, CancellationToken token = default)
{
var content = await ExecuteHgCommandServerCommand(code, "tip", token);
return await content.ReadAsStringAsync();
}

private async Task WaitForRepoEmptyState(string code, RepoEmptyState expectedState, int timeoutMs = 30_000, CancellationToken token = default)
{
// Set timeout so unforeseen errors can't cause an infinite loop
using var timeoutSource = CancellationTokenSource.CreateLinkedTokenSource(token);
timeoutSource.CancelAfter(timeoutMs);
var done = false;
try
{
while (!done && !timeoutSource.IsCancellationRequested)
{
var hash = await GetTipHash(code, timeoutSource.Token);
var isEmpty = hash == AllZeroHash;
done = expectedState switch

Check warning on line 314 in backend/LexBoxApi/Services/HgService.cs

View workflow job for this annotation

GitHub Actions / Integration tests / Test self-hosted for Mercurial 6 on develop

The switch expression does not handle some values of its input type (it is not exhaustive) involving an unnamed enum value. For example, the pattern '(LexBoxApi.Services.RepoEmptyState)2' is not covered.
{
RepoEmptyState.Empty => isEmpty,
RepoEmptyState.NonEmpty => !isEmpty
};
if (!done) await Task.Delay(2500, timeoutSource.Token);
}
}
// We don't want to actually throw if we hit the timeout, because the operation *will* succeed eventually
// once the NFS caches synchronize, so we don't want to propagate an error message to the end user. So
// even if the timeout is hit, return as if we succeeded.
catch (OperationCanceledException) { }
}

public async Task<int?> GetLexEntryCount(string code, ProjectType projectType)
{
var command = projectType switch
Expand Down Expand Up @@ -408,3 +471,9 @@ public class BrowseResponse
{
public BrowseFilesResponse[]? Files { get; set; }
}

public enum RepoEmptyState
{
Empty,
NonEmpty
}
8 changes: 8 additions & 0 deletions backend/LexBoxApi/Services/ProjectService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ public async Task FinishReset(string code, Stream? zipFile = null)
await hgService.FinishReset(code, zipFile);
await UpdateProjectMetadata(project);
}
else
{
await hgService.InvalidateDirCache(code);
}
project.ResetStatus = ResetStatus.None;
project.UpdateUpdatedDate();
await dbContext.SaveChangesAsync();
Expand Down Expand Up @@ -146,6 +150,10 @@ public async Task UpdateProjectMetadata(Project project)
project.FlexProjectMetadata.LexEntryCount = count;
}
}
else
{
await hgService.InvalidateDirCache(project.Code);
}

project.LastCommit = await hgService.GetLastCommitTimeFromHg(project.Code);
// Caller is responsible for caling dbContext.SaveChangesAsync()
Expand Down
2 changes: 2 additions & 0 deletions backend/LexCore/ServiceInterfaces/IHgService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ public interface IHgService
Task ResetRepo(string code);
Task FinishReset(string code, Stream zipFile);
Task<HttpContent> VerifyRepo(string code, CancellationToken token);
Task<string> GetTipHash(string code, CancellationToken token = default);
Task<int?> GetLexEntryCount(string code, ProjectType projectType);
Task<string?> GetRepositoryIdentifier(Project project);
Task<HttpContent> ExecuteHgRecover(string code, CancellationToken token);
Task<HttpContent> InvalidateDirCache(string code, CancellationToken token = default);
bool HasAbandonedTransactions(string projectCode);
Task<string> HgCommandHealth();
}
11 changes: 10 additions & 1 deletion backend/Testing/LexCore/Services/HgServiceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using Microsoft.Extensions.Logging.Abstractions;
using Microsoft.Extensions.Options;
using Moq;
using Moq.Contrib.HttpClient;
using Shouldly;
using Testing.Fixtures;

Expand All @@ -30,8 +31,16 @@ public HgServiceTests()
HgResumableUrl = LexboxResumable,
SendReceiveDomain = LexboxHgWeb
};
var handler = new Mock<HttpMessageHandler>(MockBehavior.Strict);

// This may need to become more sophisticated if our FinishReset tests are changed to include
// a Mercurial repo with actual commits in it, but this is good enough at the moment.
var AllZeroHash = "0000000000000000000000000000000000000000";
handler.SetupAnyRequest().ReturnsResponse(AllZeroHash);

var mockFactory = handler.CreateClientFactory();
_hgService = new HgService(new OptionsWrapper<HgConfig>(_hgConfig),
Mock.Of<IHttpClientFactory>(),
mockFactory,
NullLogger<HgService>.Instance);
CleanUpTempDir();
}
Expand Down
5 changes: 0 additions & 5 deletions backend/Testing/Services/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,6 @@ public static async Task WaitForHgRefreshIntervalAsync()
await Task.Delay(TestingEnvironmentVariables.HgRefreshInterval);
}

public static async Task WaitForLexboxMetadataUpdateAsync()
{
await Task.Delay(3000);
}

private static string GetNewProjectDir(string projectCode,
[CallerMemberName] string projectName = "")
{
Expand Down
12 changes: 0 additions & 12 deletions backend/Testing/SyncReverseProxy/SendReceiveServiceTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,10 @@ public async Task ModifyProjectData(HgProtocol protocol)
var projectConfig = _srFixture.InitLocalFlexProjectWithRepo();
await using var project = await RegisterProjectInLexBox(projectConfig, _adminApiTester);

await WaitForHgRefreshIntervalAsync();

// Push the project to the server
var sendReceiveParams = new SendReceiveParams(protocol, projectConfig);
_sendReceiveService.SendReceiveProject(sendReceiveParams, AdminAuth);

await WaitForLexboxMetadataUpdateAsync();

// Verify pushed and store last commit
var lastCommitDate = await _adminApiTester.GetProjectLastCommit(projectConfig.Code);
lastCommitDate.ShouldNotBeNullOrEmpty();
Expand All @@ -101,8 +97,6 @@ public async Task ModifyProjectData(HgProtocol protocol)
// Push changes
_sendReceiveService.SendReceiveProject(sendReceiveParams, AdminAuth, "Modify project data automated test");

await WaitForLexboxMetadataUpdateAsync();

// Verify the push updated the last commit date
var lastCommitDateAfter = await _adminApiTester.GetProjectLastCommit(projectConfig.Code);
lastCommitDateAfter.ShouldBeGreaterThan(lastCommitDate);
Expand All @@ -117,8 +111,6 @@ public async Task SendReceiveAfterProjectReset(HgProtocol protocol)
var projectConfig = _srFixture.InitLocalFlexProjectWithRepo(protocol, "SR_AfterReset");
await using var project = await RegisterProjectInLexBox(projectConfig, _adminApiTester);

await WaitForHgRefreshIntervalAsync();

var sendReceiveParams = new SendReceiveParams(protocol, projectConfig);
var srResult = _sendReceiveService.SendReceiveProject(sendReceiveParams, AdminAuth);

Expand All @@ -144,8 +136,6 @@ public async Task SendReceiveAfterProjectReset(HgProtocol protocol)
await _adminApiTester.HttpClient.PostAsync($"{_adminApiTester.BaseUrl}/api/project/resetProject/{projectConfig.Code}", null);
await _adminApiTester.HttpClient.PostAsync($"{_adminApiTester.BaseUrl}/api/project/finishResetProject/{projectConfig.Code}", null);

await WaitForHgRefreshIntervalAsync(); // TODO 765: Remove this

// Step 2: verify project is now empty, i.e. tip is "0000000..."
response = await _adminApiTester.HttpClient.GetAsync(tipUri.Uri);
jsonResult = await response.Content.ReadFromJsonAsync<JsonObject>();
Expand All @@ -169,8 +159,6 @@ public async Task SendReceiveAfterProjectReset(HgProtocol protocol)
var srResultStep3 = _sendReceiveService.SendReceiveProject(sendReceiveParams, AdminAuth);
_output.WriteLine(srResultStep3);

await WaitForHgRefreshIntervalAsync(); // TODO 765: Remove this

// Step 4: verify project tip is same hash as original project tip
response = await _adminApiTester.HttpClient.GetAsync(tipUri.Uri);
jsonResult = await response.Content.ReadFromJsonAsync<JsonObject>();
Expand Down
1 change: 1 addition & 0 deletions backend/Testing/Testing.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<Choose>
<When Condition=" '$(MercurialVersion)' == '6' ">
<ItemGroup>
<PackageReference Include="Moq.Contrib.HttpClient" Version="1.4.0" />
<PackageReference Include="SIL.Chorus.LibChorus" Version="6.0.0-beta0048" />
<PackageReference Include="SIL.Chorus.Mercurial" Version="6.*" />
</ItemGroup>
Expand Down
48 changes: 16 additions & 32 deletions frontend/tests/resetProject.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,14 @@ test('reset project and upload .zip file', async ({ page, tempProject, tempDir }
await resetProjectModel.assertGone();

// Step 2: Get tip hash and file list from hgweb, check some known values
// It can take a while for the server to pick up the new repo
let beforeResetJson: HgWebJson;
await expect(async () => {
const beforeResetResponse = await page.request.get(`${testEnv.serverBaseUrl}/hg/${tempProject.code}/file/tip?style=json-lex`);
beforeResetJson = await beforeResetResponse.json() as HgWebJson;
expect(beforeResetJson).toHaveProperty('node');
expect(beforeResetJson.node).not.toEqual(allZeroHash);
expect(beforeResetJson).toHaveProperty('files');
expect(beforeResetJson.files).toHaveLength(1);
expect(beforeResetJson.files[0]).toHaveProperty('basename');
expect(beforeResetJson.files[0].basename).toBe('hello.txt');
}).toPass({
intervals: [1_000, 3_000, 5_000],
});
const beforeResetResponse = await page.request.get(`${testEnv.serverBaseUrl}/hg/${tempProject.code}/file/tip?style=json-lex`);
const beforeResetJson = await beforeResetResponse.json() as HgWebJson;
expect(beforeResetJson).toHaveProperty('node');
expect(beforeResetJson.node).not.toEqual(allZeroHash);
expect(beforeResetJson).toHaveProperty('files');
expect(beforeResetJson.files).toHaveLength(1);
expect(beforeResetJson.files[0]).toHaveProperty('basename');
expect(beforeResetJson.files[0].basename).toBe('hello.txt');

// Step 3: reset project, do not upload zip file
await projectPage.goto();
Expand All @@ -65,16 +59,11 @@ test('reset project and upload .zip file', async ({ page, tempProject, tempDir }
await resetProjectModel.assertGone();

// Step 4: confirm it's empty now
// It can take a while for the server to pick up the new repo
await expect(async () => {
const afterResetResponse = await page.request.get(`${testEnv.serverBaseUrl}/hg/${tempProject.code}/file/tip?style=json-lex`);
const afterResetJson = await afterResetResponse.json() as HgWebJson;
expect(afterResetJson.node).toEqual(allZeroHash);
expect(afterResetJson).toHaveProperty('files');
expect(afterResetJson.files).toHaveLength(0);
}).toPass({
intervals: [1_000, 3_000, 5_000],
});
const afterResetResponse = await page.request.get(`${testEnv.serverBaseUrl}/hg/${tempProject.code}/file/tip?style=json-lex`);
const afterResetJson = await afterResetResponse.json() as HgWebJson;
expect(afterResetJson.node).toEqual(allZeroHash);
expect(afterResetJson).toHaveProperty('files');
expect(afterResetJson.files).toHaveLength(0);

// Step 5: reset project again, uploading zip file downloaded from step 1
await projectPage.goto();
Expand All @@ -88,12 +77,7 @@ test('reset project and upload .zip file', async ({ page, tempProject, tempDir }
await resetProjectModel.assertGone();

// Step 6: confirm tip hash and contents are same as before reset
// It can take a while for the server to pick up the new repo
await expect(async () => {
const afterUploadResponse = await page.request.get(`${testEnv.serverBaseUrl}/hg/${tempProject.code}/file/tip?style=json-lex`);
const afterResetJSon = await afterUploadResponse.json() as HgWebJson;
expect(afterResetJSon).toEqual(beforeResetJson); // NOT .toBe(), which would check that they're the same object.
}).toPass({
intervals: [1_000, 3_000, 5_000],
});
const afterUploadResponse = await page.request.get(`${testEnv.serverBaseUrl}/hg/${tempProject.code}/file/tip?style=json-lex`);
const afterResetJSon = await afterUploadResponse.json() as HgWebJson;
expect(afterResetJSon).toEqual(beforeResetJson); // NOT .toBe(), which would check that they're the same object.
});
6 changes: 5 additions & 1 deletion hgweb/command-runner.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# Define the list of allowed commands
allowed_commands=("verify" "tip" "wesaylexentrycount" "lexentrycount" "recover" "healthz")
allowed_commands=("verify" "tip" "wesaylexentrycount" "lexentrycount" "recover" "healthz" "invalidatedircache")

# Get the project code and command name from the URL
IFS='/' read -ra PATH_SEGMENTS <<< "$PATH_INFO"
Expand Down Expand Up @@ -44,6 +44,10 @@ echo ""

# Run the hg command, simply output to stdout
first_char=$(echo $project_code | cut -c1)
# Ensure NFS cache is refreshed in case project repo changed in another pod (e.g., project reset)
ls /var/hg/repos/$first_char/$project_code/.hg >/dev/null 2>/dev/null # Don't need output; this is enough to refresh NFS dir cache
# Sometimes invalidatedircache is called after deleting a project, so the cd would fail. So exit fast in that case.
[ "x$command_name" = "xinvalidatedircache" ] && exit 0
cd /var/hg/repos/$first_char/$project_code
case $command_name in

Expand Down

0 comments on commit 20ffa08

Please sign in to comment.