Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract language tags from FLEx projects and store in DB metadata #952

Merged
merged 26 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ac43b18
Initial work extracting language tags
rmunn Jul 11, 2024
bda54e1
Load all four ws lists from .langproj file
rmunn Jul 12, 2024
2c9bbe8
Remove language-guessing logic for now
rmunn Jul 12, 2024
ea91cfd
Add new DB columns for FLEx writing systems
rmunn Jul 12, 2024
6534353
Attempt to configure EF for WritingSystems
rmunn Jul 12, 2024
2fab0b2
Better return type for ws-parsing method
rmunn Jul 12, 2024
49b954f
add debug logging to testing services
hahn-kev Jul 12, 2024
eb90a00
refactor xml parsing into HgService, add UpdateProjectLangTags to Pro…
hahn-kev Jul 12, 2024
8306ea0
create migration to add writing systems to flex project metadata
hahn-kev Jul 12, 2024
dd2ffa7
write test for writing language tags into flex project metadata
hahn-kev Jul 12, 2024
c8e7f4d
Add IsDefault field to ws tags in project data
rmunn Jul 15, 2024
ecbc444
Add command to extract LangProject GUID as well
rmunn Jul 15, 2024
90d62a0
Add LangProjectId column to FlexProjectMetadata
rmunn Jul 15, 2024
54a140a
Explain grep regex in a comment
rmunn Jul 15, 2024
cb663b8
Fix typo in language tag parsing
rmunn Jul 15, 2024
6744abb
Make writing system ordering match what FLEx does
rmunn Jul 15, 2024
ef228fd
Add VERY basic UI for writing systems on proj page
rmunn Jul 15, 2024
f8008cc
Translate label for writing system list
rmunn Jul 15, 2024
7886e8f
Improve UI for writing system list
rmunn Jul 15, 2024
0f3b465
Add button for admins to refresh language list
rmunn Jul 15, 2024
d272e5e
Improve types for WritingSystemList component
rmunn Jul 15, 2024
b356f76
add project controller actions to update missing langprojectid and wr…
hahn-kev Jul 16, 2024
27699a0
Use helper method to get lang tags from XML
rmunn Jul 18, 2024
3cbc6fd
Remove migration that does nothing
rmunn Jul 18, 2024
47117b4
Show loading indicator at correct time
rmunn Jul 18, 2024
388e474
Fix failing unit tests
rmunn Jul 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 66 additions & 4 deletions backend/LexBoxApi/Controllers/ProjectController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ public class ProjectController(
ISchedulerFactory scheduler)
: ControllerBase
{

[HttpPost("refreshProjectLastChanged")]
public async Task<ActionResult> RefreshProjectLastChanged(string projectCode)
{
Expand Down Expand Up @@ -55,6 +54,7 @@ public async Task<ActionResult> UpdateAllRepoCommitDates(bool onlyUnknown)
{
project.LastCommit = await hgService.GetLastCommitTimeFromHg(project.Code);
}

await lexBoxDbContext.SaveChangesAsync();

return Ok();
Expand All @@ -73,14 +73,18 @@ public async Task<ActionResult<Project>> UpdateProjectType(Guid id)
project.Type = await hgService.DetermineProjectType(project.Code);
await lexBoxDbContext.SaveChangesAsync();
}

return project;
}

[HttpPost("setProjectType")]
[AdminRequired]
public async Task<ActionResult> SetProjectType(string projectCode, ProjectType projectType, bool overrideKnown = false)
public async Task<ActionResult> SetProjectType(string projectCode,
ProjectType projectType,
bool overrideKnown = false)
{
await lexBoxDbContext.Projects.Where(p => p.Code == projectCode && (p.Type == ProjectType.Unknown || overrideKnown))
await lexBoxDbContext.Projects
.Where(p => p.Code == projectCode && (p.Type == ProjectType.Unknown || overrideKnown))
.ExecuteUpdateAsync(u => u.SetProperty(p => p.Type, projectType));
return Ok();
}
Expand All @@ -107,7 +111,9 @@ public async Task<ActionResult<ProjectType>> DetermineProjectType(Guid id)
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status404NotFound)]
[AdminRequired]
public async Task<ActionResult<Dictionary<string, ProjectType>>> UpdateProjectTypesForUnknownProjects(int limit = 50, int offset = 0)
public async Task<ActionResult<Dictionary<string, ProjectType>>> UpdateProjectTypesForUnknownProjects(int limit =
50,
int offset = 0)
{
var projects = lexBoxDbContext.Projects
.Where(p => p.Type == ProjectType.Unknown)
Expand All @@ -121,6 +127,7 @@ public async Task<ActionResult<Dictionary<string, ProjectType>>> UpdateProjectTy
project.Type = await hgService.DetermineProjectType(project.Code);
result.Add(project.Code, project.Type);
}

await lexBoxDbContext.SaveChangesAsync();
return result;
}
Expand Down Expand Up @@ -176,6 +183,7 @@ public async Task<ActionResult<Project>> DeleteProject(Guid id)
}

public record HgCommandResponse(string Response);

[HttpGet("hgVerify/{code}")]
[AdminRequired]
[ProducesResponseType(StatusCodes.Status200OK)]
Expand All @@ -191,6 +199,7 @@ public async Task HgVerify(string code)
await Response.CompleteAsync();
return;
}

var result = await hgService.VerifyRepo(code, HttpContext.RequestAborted);
await StreamHttpResponse(result);
}
Expand All @@ -210,6 +219,7 @@ public async Task HgRecover(string code)
await Response.CompleteAsync();
return;
}

var result = await hgService.ExecuteHgRecover(code, HttpContext.RequestAborted);
await StreamHttpResponse(result);
}
Expand All @@ -232,6 +242,58 @@ public async Task<ActionResult<int>> UpdateLexEntryCount(string code)
return result is null ? NotFound() : result;
}

[HttpPost("updateLanguageList/{code}")]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status404NotFound)]
[ProducesDefaultResponseType]
public async Task UpdateLanguageList(string code)
{
var projectId = await projectService.LookupProjectId(code);
await projectService.UpdateProjectLangTags(projectId);
}

[HttpPost("updateMissingLanguageList")]
public async Task<ActionResult<string[]>> UpdateMissingLanguageList(int limit = 10)
{
var projects = lexBoxDbContext.Projects
.Include(p => p.FlexProjectMetadata)
.Where(p => p.Type == ProjectType.FLEx && p.LastCommit != null && p.FlexProjectMetadata!.WritingSystems == null)
.Take(limit)
.AsAsyncEnumerable();
var codes = new List<string>(limit);
await foreach (var project in projects)
{
codes.Add(project.Code);
project.FlexProjectMetadata ??= new FlexProjectMetadata();
project.FlexProjectMetadata.WritingSystems = await hgService.GetProjectWritingSystems(project.Code);
}

await lexBoxDbContext.SaveChangesAsync();

return Ok(codes);
}

[HttpPost("updateMissingLangProjectId")]
public async Task<ActionResult<string[]>> UpdateMissingLangProjectId(int limit = 10)
{
var projects = lexBoxDbContext.Projects
.Include(p => p.FlexProjectMetadata)
.Where(p => p.Type == ProjectType.FLEx && p.LastCommit != null && p.FlexProjectMetadata!.LangProjectId == null)
.Take(limit)
.AsAsyncEnumerable();
var codes = new List<string>(limit);
await foreach (var project in projects)
{
codes.Add(project.Code);
project.FlexProjectMetadata ??= new FlexProjectMetadata();
project.FlexProjectMetadata.LangProjectId = await hgService.GetProjectIdOfFlexProject(project.Code);
}

await lexBoxDbContext.SaveChangesAsync();

return Ok(codes);
}

[HttpPost("queueUpdateProjectMetadataTask")]
public async Task<ActionResult> QueueUpdateProjectMetadataTask(string projectCode)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using HotChocolate.Data.Sorting;
using LexCore.Entities;

namespace LexBoxApi.GraphQL.CustomTypes;

public class FlexProjectMetadataGqlSortConfiguration : SortInputType<FlexProjectMetadata>
{
protected override void Configure(ISortInputTypeDescriptor<FlexProjectMetadata> descriptor)
{
descriptor.Field(p => p.WritingSystems).Ignore();
}
}
54 changes: 53 additions & 1 deletion backend/LexBoxApi/Services/HgService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,58 @@ await Task.Run(() =>
});
}

/// <summary>
/// Returns either an empty string, or XML (in string form) with a root LangTags element containing five child elements: AnalysisWss, CurAnalysisWss, VernWss, CurVernWss, and CurPronunWss.
/// Each child element will contain a single `<Uni>` element whose text content is a list of tags separated by spaces.
/// </summary>
private async Task<string> GetLangTagsAsXml(ProjectCode code, CancellationToken token = default)
{
var result = await ExecuteHgCommandServerCommand(code, "flexwritingsystems", token);
var xmlBody = await result.ReadAsStringAsync(token);
if (string.IsNullOrEmpty(xmlBody)) return string.Empty;
return $"<LangTags>{xmlBody}</LangTags>";
}

private string[] GetWsList(System.Xml.XmlElement root, string tagName)
{
var wsStr = root[tagName]?["Uni"]?.InnerText ?? "";
// String.Split(null) splits on any whitespace, but needs a type cast so the compiler can tell which overload (char[] vs string[]) to use
return wsStr.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries);
}

public async Task<ProjectWritingSystems?> GetProjectWritingSystems(ProjectCode code, CancellationToken token = default)
{
var langTagsXml = await GetLangTagsAsXml(code, token);
if (string.IsNullOrEmpty(langTagsXml)) return null;
var doc = new System.Xml.XmlDocument();
doc.LoadXml(langTagsXml);
var root = doc.DocumentElement;
if (root is null) return null;
var vernWss = GetWsList(root, "VernWss");
var analysisWss = GetWsList(root, "AnalysisWss");
var curVernWss = GetWsList(root, "CurVernWss");
var curAnalysisWss = GetWsList(root, "CurAnalysisWss");
var curVernSet = curVernWss.ToHashSet();
var curAnalysisSet = curAnalysisWss.ToHashSet();
// Ordering is important here to match how FLEx handles things: all *current* writing systems first, then all *non-current*.
var vernWsIds = curVernWss.Select((tag, idx) => new FLExWsId { Tag = tag, IsActive = true, IsDefault = idx == 0 }).ToList();
var analysisWsIds = curAnalysisWss.Select((tag, idx) => new FLExWsId { Tag = tag, IsActive = true, IsDefault = idx == 0 }).ToList();
vernWsIds.AddRange(vernWss.Where(ws => !curVernSet.Contains(ws)).Select(tag => new FLExWsId { Tag = tag, IsActive = false, IsDefault = false }));
analysisWsIds.AddRange(analysisWss.Where(ws => !curAnalysisSet.Contains(ws)).Select(tag => new FLExWsId { Tag = tag, IsActive = false, IsDefault = false }));
return new ProjectWritingSystems
{
VernacularWss = vernWsIds,
AnalysisWss = analysisWsIds
};
}

public async Task<Guid?> GetProjectIdOfFlexProject(ProjectCode code, CancellationToken token = default)
{
var result = await ExecuteHgCommandServerCommand(code, "flexprojectid", token);
var text = await result.ReadAsStringAsync(token);
if (Guid.TryParse(text, out var guid)) return guid;
return null;
}

public Task RevertRepo(ProjectCode code, string revHash)
{
Expand Down Expand Up @@ -254,11 +306,11 @@ public async Task<Changeset[]> GetChangesets(ProjectCode projectCode)
return logResponse?.Changesets ?? Array.Empty<Changeset>();
}


public Task<HttpContent> VerifyRepo(ProjectCode code, CancellationToken token)
{
return ExecuteHgCommandServerCommand(code, "verify", token);
}

public async Task<HttpContent> ExecuteHgRecover(ProjectCode code, CancellationToken token)
{
var response = await ExecuteHgCommandServerCommand(code, "recover", token);
Expand Down
23 changes: 23 additions & 0 deletions backend/LexBoxApi/Services/ProjectService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,29 @@ public async Task<Guid> CreateProject(CreateProjectInput input)
return projectId;
}

public async Task UpdateProjectLangTags(Guid projectId)
{
var project = await dbContext.Projects.FindAsync(projectId);
if (project is null || project.Type != ProjectType.FLEx) return;
await dbContext.Entry(project).Reference(p => p.FlexProjectMetadata).LoadAsync();
var langTags = await hgService.GetProjectWritingSystems(project.Code);
if (langTags is null) return;
project.FlexProjectMetadata ??= new FlexProjectMetadata();
project.FlexProjectMetadata.WritingSystems = langTags;
await dbContext.SaveChangesAsync();
}

public async Task UpdateProjectLangProjectId(Guid projectId)
{
var project = await dbContext.Projects.FindAsync(projectId);
if (project is null || project.Type != ProjectType.FLEx) return;
await dbContext.Entry(project).Reference(p => p.FlexProjectMetadata).LoadAsync();
var langProjGuid = await hgService.GetProjectIdOfFlexProject(project.Code);
project.FlexProjectMetadata ??= new FlexProjectMetadata();
project.FlexProjectMetadata.LangProjectId = langProjGuid;
await dbContext.SaveChangesAsync();
rmunn marked this conversation as resolved.
Show resolved Hide resolved
}

public async Task<Guid> CreateDraftProject(CreateProjectInput input)
{
// No need for a transaction if we're just saving a single item
Expand Down
18 changes: 18 additions & 0 deletions backend/LexCore/Entities/FlexProjectMetadata.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,22 @@ public class FlexProjectMetadata
{
public Guid ProjectId { get; set; }
public int? LexEntryCount { get; set; }
/// <summary>
/// GUID from the LangProject element, which is not the same as the ID of the LexBox project
/// </summary>
public Guid? LangProjectId { get; set; }
public ProjectWritingSystems? WritingSystems { get; set; }
}

public class ProjectWritingSystems
{
public required List<FLExWsId> VernacularWss { get; set; } = [];
public required List<FLExWsId> AnalysisWss { get; set; } = [];
}

public class FLExWsId
{
public required string Tag { get; set; }
public bool IsActive { get; set; }
public bool IsDefault { get; set; }
}
2 changes: 2 additions & 0 deletions backend/LexCore/ServiceInterfaces/IHgService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ public interface IHgService
Task<ProjectType> DetermineProjectType(ProjectCode projectCode);
Task DeleteRepo(ProjectCode code);
Task SoftDeleteRepo(ProjectCode code, string deletedRepoSuffix);
Task<ProjectWritingSystems?> GetProjectWritingSystems(ProjectCode code, CancellationToken token = default);
Task<Guid?> GetProjectIdOfFlexProject(ProjectCode code, CancellationToken token = default);
BackupExecutor? BackupRepo(ProjectCode code);
Task ResetRepo(ProjectCode code);
Task FinishReset(ProjectCode code, Stream zipFile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,11 @@ public class FlexProjectMetadataEntityConfiguration: IEntityTypeConfiguration<Fl
public virtual void Configure(EntityTypeBuilder<FlexProjectMetadata> builder)
{
builder.HasKey(e => e.ProjectId);
builder.OwnsOne(e => e.WritingSystems, wsb =>
{
wsb.ToJson();
wsb.OwnsMany(e => e.AnalysisWss);
wsb.OwnsMany(e => e.VernacularWss);
});
}
}
Loading
Loading