From 67378902cf270604e17eab2bab59dc1433c7d181 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Tue, 16 Jul 2024 19:52:35 -0600 Subject: [PATCH] improve import performance (#953) * batch import entries as a single commit. Import Semantic domains, Parts of Speech. * pull common code between FwDataMiniLcmHub.cs and CrdtMiniLcmApiHub.cs into MiniLcmApiHubBase.cs * fix copy function on writing system order. * enable LcmCrdt.Tests to log to debug with sensitive data from EF * change home buttons into proper links * configure ef to ignore Entity.Senses and Sense.ExampleSentences * search filters crdt entries by gloss * correct headword code to trim and switch to lexeme form when citation form is empty * make GetPartsOfSpeech and GetSemanticDomains required methods on ILexboxApi * delete crdt db if project creation throws exception * setup bulk import of semantic domains to speed up importing large projects * add some logging to give import feedback when doing bulk import * display 2 units of precision in import complete log * add explanation of workaround for Senses property in Entry --- .../Api/FwDataMiniLcmApi.cs | 13 +- .../Changes/JsonPatchChangeTests.cs | 7 +- .../FwLite/LcmCrdt.Tests/LcmCrdt.Tests.csproj | 1 + .../FwLite/LcmCrdt.Tests/LexboxApiTests.cs | 10 ++ .../LcmCrdt.Tests/SerializationTests.cs | 16 +-- backend/FwLite/LcmCrdt/CrdtLexboxApi.cs | 52 +++++++- backend/FwLite/LcmCrdt/LcmCrdtKernel.cs | 8 +- backend/FwLite/LcmCrdt/Objects/Entry.cs | 21 ++- backend/FwLite/LcmCrdt/Objects/Sense.cs | 14 ++ .../FwLite/LcmCrdt/Objects/WritingSystem.cs | 3 +- backend/FwLite/LcmCrdt/ProjectsService.cs | 28 +++- .../LocalWebApp/Hubs/CrdtMiniLcmApiHub.cs | 86 +++---------- .../LocalWebApp/Hubs/FwDataMiniLcmHub.cs | 115 +---------------- .../LocalWebApp/Hubs/MiniLcmApiHubBase.cs | 120 ++++++++++++++++++ backend/FwLite/LocalWebApp/LocalWebApp.csproj | 1 + .../Services/ImportFwdataService.cs | 66 +++++++--- backend/FwLite/MiniLcm/ILexboxApi.cs | 14 +- backend/FwLite/MiniLcm/InMemoryApi.cs | 19 +++ backend/LfClassicData/LfClassicLexboxApi.cs | 10 ++ frontend/viewer/src/HomeView.svelte | 19 +-- 20 files changed, 382 insertions(+), 241 deletions(-) create mode 100644 backend/FwLite/LocalWebApp/Hubs/MiniLcmApiHubBase.cs diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs index beb449c27..4cb2468b5 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs @@ -58,6 +58,7 @@ internal int GetWritingSystemHandle(WritingSystemId ws, WritingSystemType? type return lcmWs.Handle; } + internal CoreWritingSystemDefinition? GetLcmWritingSystem(WritingSystemId ws, WritingSystemType? type = null) { if (ws == "default") @@ -156,6 +157,11 @@ public async IAsyncEnumerable GetPartsOfSpeech() } } + public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech) + { + throw new NotImplementedException(); + } + public async IAsyncEnumerable GetSemanticDomains() { foreach (var semanticDomain in SemanticDomainRepository.AllInstances().OrderBy(p => p.Name.BestAnalysisAlternative.Text)) @@ -164,11 +170,16 @@ public async IAsyncEnumerable GetSemanticDomains() { Id = semanticDomain.Guid, Name = FromLcmMultiString(semanticDomain.Name), - Code = semanticDomain.OcmCodes + Code = semanticDomain.OcmCodes ?? "" }; } } + public async Task CreateSemanticDomain(SemanticDomain semanticDomain) + { + throw new NotImplementedException(); + } + internal ICmSemanticDomain GetLcmSemanticDomain(Guid semanticDomainId) { return SemanticDomainRepository.GetObject(semanticDomainId); diff --git a/backend/FwLite/LcmCrdt.Tests/Changes/JsonPatchChangeTests.cs b/backend/FwLite/LcmCrdt.Tests/Changes/JsonPatchChangeTests.cs index 80a00aff9..327eeaa29 100644 --- a/backend/FwLite/LcmCrdt.Tests/Changes/JsonPatchChangeTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/Changes/JsonPatchChangeTests.cs @@ -3,6 +3,7 @@ using LcmCrdt.Changes; using LcmCrdt.Objects; using SystemTextJsonPatch; +using SystemTextJsonPatch.Operations; namespace LcmCrdt.Tests.Changes; @@ -14,7 +15,7 @@ public void NewChangeAction_ThrowsForRemoveAtIndex() var act = () => new JsonPatchChange(Guid.NewGuid(), patch => { - patch.Remove(entry => entry.Senses, 1); + patch.Operations.Add(new Operation("remove", "/senses/1", null, null)); }); act.Should().Throw(); } @@ -23,7 +24,7 @@ public void NewChangeAction_ThrowsForRemoveAtIndex() public void NewChangeDirect_ThrowsForRemoveAtIndex() { var patch = new JsonPatchDocument(); - patch.Remove(entry => entry.Senses, 1); + patch.Operations.Add(new Operation("remove", "/senses/1", null, null)); var act = () => new JsonPatchChange(Guid.NewGuid(), patch); act.Should().Throw(); } @@ -32,7 +33,7 @@ public void NewChangeDirect_ThrowsForRemoveAtIndex() public void NewChangeIPatchDoc_ThrowsForRemoveAtIndex() { var patch = new JsonPatchDocument(); - patch.Remove(entry => entry.Senses, 1); + patch.Operations.Add(new Operation("remove", "/senses/1", null, null)); var act = () => new JsonPatchChange(Guid.NewGuid(), patch, JsonSerializerOptions.Default); act.Should().Throw(); } diff --git a/backend/FwLite/LcmCrdt.Tests/LcmCrdt.Tests.csproj b/backend/FwLite/LcmCrdt.Tests/LcmCrdt.Tests.csproj index 050688bc2..6f666fa18 100644 --- a/backend/FwLite/LcmCrdt.Tests/LcmCrdt.Tests.csproj +++ b/backend/FwLite/LcmCrdt.Tests/LcmCrdt.Tests.csproj @@ -11,6 +11,7 @@ + diff --git a/backend/FwLite/LcmCrdt.Tests/LexboxApiTests.cs b/backend/FwLite/LcmCrdt.Tests/LexboxApiTests.cs index bc077b51e..ee45455de 100644 --- a/backend/FwLite/LcmCrdt.Tests/LexboxApiTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/LexboxApiTests.cs @@ -5,6 +5,7 @@ using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection.Extensions; +using Microsoft.Extensions.Logging; using MiniLcm; using Entry = MiniLcm.Entry; using ExampleSentence = MiniLcm.ExampleSentence; @@ -26,6 +27,7 @@ public BasicApiTests() { var services = new ServiceCollection() .AddLcmCrdtClient() + .AddLogging(builder => builder.AddDebug()) .RemoveAll(typeof(ProjectContext)) .AddSingleton(new MockProjectContext(new CrdtProject("sena-3", ":memory:"))) .BuildServiceProvider(); @@ -146,6 +148,14 @@ public async Task GetWritingSystems() writingSystems.Analysis.Should().NotBeEmpty(); } + [Fact] + public async Task CreatingMultipleWritingSystems_DoesNotHaveDuplicateOrders() + { + await _api.CreateWritingSystem(WritingSystemType.Vernacular, new WritingSystem() { Id = "test-2", Name = "test", Abbreviation = "test", Font = "Arial", Exemplars = new[] { "test" } }); + var writingSystems = await DataModel.GetLatestObjects().Where(ws => ws.Type == WritingSystemType.Vernacular).ToArrayAsync(); + writingSystems.GroupBy(ws => ws.Order).Should().NotContain(g => g.Count() > 1); + } + [Fact] public async Task GetEntriesByExemplar() { diff --git a/backend/FwLite/LcmCrdt.Tests/SerializationTests.cs b/backend/FwLite/LcmCrdt.Tests/SerializationTests.cs index 9d1527fb7..7016cc082 100644 --- a/backend/FwLite/LcmCrdt.Tests/SerializationTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/SerializationTests.cs @@ -19,14 +19,14 @@ public void CanSerializeEntry() LexemeForm = { Values = { { "en", "test" } } }, CitationForm = { Values = { { "en", "test" } } }, Senses = - { + [ new Sense { Id = Guid.NewGuid(), EntryId = entryId, Gloss = { Values = { { "en", "test" } } } } - } + ] }; var act = () => JsonSerializer.Serialize(entry); var json = act.Should().NotThrow().Subject; @@ -44,7 +44,7 @@ public void CanDeserializeEntry() LexemeForm = { Values = { { "en", "test" } } }, CitationForm = { Values = { { "en", "test" } } }, Senses = - { + [ new Sense { Id = senseId, @@ -60,7 +60,7 @@ public void CanDeserializeEntry() } } } - } + ] }; var json = JsonSerializer.Serialize(entry); var act = () => JsonSerializer.Deserialize(json); @@ -94,14 +94,14 @@ public void EqualityTest() LexemeForm = { Values = { { "en", "test" } } }, CitationForm = { Values = { { "en", "test" } } }, Senses = - { + [ new Sense { Id = senseId, EntryId = entryId, Gloss = { Values = { { "en", "test" } } } } - } + ] }; var entryCopy = new Entry() { @@ -109,14 +109,14 @@ public void EqualityTest() LexemeForm = { Values = { { "en", "test" } } }, CitationForm = { Values = { { "en", "test" } } }, Senses = - { + [ new Sense { Id = senseId, EntryId = entryId, Gloss = { Values = { { "en", "test" } } } } - } + ] }; entry.Should().BeEquivalentTo(entryCopy); } diff --git a/backend/FwLite/LcmCrdt/CrdtLexboxApi.cs b/backend/FwLite/LcmCrdt/CrdtLexboxApi.cs index daa0db55a..f22346f75 100644 --- a/backend/FwLite/LcmCrdt/CrdtLexboxApi.cs +++ b/backend/FwLite/LcmCrdt/CrdtLexboxApi.cs @@ -73,11 +73,26 @@ public IAsyncEnumerable GetPartsOfSpeech() return PartsOfSpeech.AsAsyncEnumerable(); } + public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech) + { + await dataModel.AddChange(ClientId, new CreatePartOfSpeechChange(partOfSpeech.Id, partOfSpeech.Name, false)); + } + public IAsyncEnumerable GetSemanticDomains() { return SemanticDomains.AsAsyncEnumerable(); } + public async Task CreateSemanticDomain(MiniLcm.SemanticDomain semanticDomain) + { + await dataModel.AddChange(ClientId, new CreateSemanticDomainChange(semanticDomain.Id, semanticDomain.Name, semanticDomain.Code)); + } + + public async Task BulkImportSemanticDomains(IEnumerable semanticDomains) + { + await dataModel.AddChanges(ClientId, semanticDomains.Select(sd => new CreateSemanticDomainChange(sd.Id, sd.Name, sd.Code))); + } + public IAsyncEnumerable GetEntries(QueryOptions? options = null) { return GetEntriesAsyncEnum(predicate: null, options); @@ -86,7 +101,12 @@ public IAsyncEnumerable GetPartsOfSpeech() public IAsyncEnumerable SearchEntries(string? query, QueryOptions? options = null) { if (string.IsNullOrEmpty(query)) return GetEntriesAsyncEnum(null, options); - return GetEntriesAsyncEnum(e => e.LexemeForm.SearchValue(query) || e.CitationForm.SearchValue(query), options); + + return GetEntriesAsyncEnum(e => e.LexemeForm.SearchValue(query) + || e.CitationForm.SearchValue(query) + || e.Senses.Any(s => s.Gloss.SearchValue(query)) + + , options); } private async IAsyncEnumerable GetEntriesAsyncEnum( @@ -188,6 +208,35 @@ await dataModel.AddChanges(ClientId, ], deferCommit: true); } + public async Task BulkCreateEntries(IAsyncEnumerable entries) + { + var semanticDomains = await SemanticDomains.ToDictionaryAsync(sd => sd.Id, sd => sd); + var partsOfSpeech = await PartsOfSpeech.ToDictionaryAsync(p => p.Id, p => p); + await dataModel.AddChanges(ClientId, entries.ToBlockingEnumerable().SelectMany(entry => CreateEntryChanges(entry, semanticDomains, partsOfSpeech))); + } + + private IEnumerable CreateEntryChanges(MiniLcm.Entry entry, Dictionary semanticDomains, Dictionary partsOfSpeech) + { + yield return new CreateEntryChange(entry); + foreach (var sense in entry.Senses) + { + sense.SemanticDomains = sense.SemanticDomains + .Select(sd => semanticDomains.TryGetValue(sd.Id, out var selectedSd) ? selectedSd : null) + .OfType() + .ToList(); + if (sense.PartOfSpeechId is not null && partsOfSpeech.TryGetValue(sense.PartOfSpeechId.Value, out var partOfSpeech)) + { + sense.PartOfSpeechId = partOfSpeech.Id; + sense.PartOfSpeech = partOfSpeech.Name["en"] ?? string.Empty; + } + yield return new CreateSenseChange(sense, entry.Id); + foreach (var exampleSentence in sense.ExampleSentences) + { + yield return new CreateExampleSentenceChange(exampleSentence, sense.Id); + } + } + } + public async Task CreateEntry(MiniLcm.Entry entry) { await dataModel.AddChanges(ClientId, @@ -226,7 +275,6 @@ private async IAsyncEnumerable CreateSenseChanges(Guid entryId, MiniLcm sense.PartOfSpeech = partOfSpeech?.Name["en"] ?? string.Empty; } - yield return new CreateSenseChange(sense, entryId); foreach (var change in sense.ExampleSentences.Select(sentence => new CreateExampleSentenceChange(sentence, sense.Id))) diff --git a/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs b/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs index f61aa16cb..0155ec8bf 100644 --- a/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs +++ b/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs @@ -17,7 +17,6 @@ namespace LcmCrdt; - public static class LcmCrdtKernel { public static IServiceCollection AddLcmCrdtClient(this IServiceCollection services) @@ -40,6 +39,9 @@ private static void ConfigureDbOptions(IServiceProvider provider, DbContextOptio { var projectContext = provider.GetRequiredService(); if (projectContext.Project is null) throw new NullReferenceException("Project is null"); +#if DEBUG + builder.EnableSensitiveDataLogging(); +#endif builder.UseSqlite($"Data Source={projectContext.Project.DbPath}") .UseLinqToDB(optionsBuilder => { @@ -48,6 +50,8 @@ private static void ConfigureDbOptions(IServiceProvider provider, DbContextOptio nameof(Commit.HybridDateTime) + "." + nameof(HybridDateTime.DateTime))) .HasAttribute(new ColumnAttribute(nameof(HybridDateTime.Counter), nameof(Commit.HybridDateTime) + "." + nameof(HybridDateTime.Counter))) + .Entity().Property(e => e.Id) + .Association(e => (e.Senses as IEnumerable)!, e => e.Id, s => s.EntryId) .Build(); mappingSchema.SetConvertExpression((MiniLcm.WritingSystemId id) => new DataParameter { Value = id.Code, DataType = DataType.Text }); @@ -64,6 +68,7 @@ private static void ConfigureCrdt(CrdtConfig config) config.ObjectTypeListBuilder .Add(builder => { + builder.Ignore(e => e.Senses); // builder.OwnsOne(e => e.Note, n => n.ToJson()); // builder.OwnsOne(e => e.LexemeForm, n => n.ToJson()); // builder.OwnsOne(e => e.CitationForm, n => n.ToJson()); @@ -71,6 +76,7 @@ private static void ConfigureCrdt(CrdtConfig config) }) .Add(builder => { + builder.Ignore(s => s.ExampleSentences); builder.HasOne() .WithMany() .HasForeignKey(sense => sense.EntryId); diff --git a/backend/FwLite/LcmCrdt/Objects/Entry.cs b/backend/FwLite/LcmCrdt/Objects/Entry.cs index d30cfcd98..8d72976e9 100644 --- a/backend/FwLite/LcmCrdt/Objects/Entry.cs +++ b/backend/FwLite/LcmCrdt/Objects/Entry.cs @@ -1,4 +1,5 @@ using System.Linq.Expressions; +using System.Text.Json.Serialization; using Crdt; using Crdt.Entities; using LinqToDB; @@ -8,7 +9,6 @@ namespace LcmCrdt.Objects; public class Entry : MiniLcm.Entry, IObjectBase { - Guid IObjectBase.Id { get => Id; @@ -17,17 +17,32 @@ Guid IObjectBase.Id public DateTimeOffset? DeletedAt { get; set; } + /// + /// This is a bit of a hack, we want to be able to reference senses when running a query, and they must be CrdtSenses + /// however we only want to store the senses in the entry as MiniLcmSenses, so we need to convert them back to CrdtSenses + /// Note, even though this is JsonIgnored, the Senses property in the base class is still serialized + /// + [JsonIgnore] + public new IReadOnlyList Senses + { + get + { + return [..base.Senses.Select(s => s as Sense ?? Sense.FromMiniLcm(s, Id))]; + } + set { base.Senses = [..value]; } + } + [ExpressionMethod(nameof(HeadwordExpression))] public string Headword(WritingSystemId ws) { var word = CitationForm[ws]; if (string.IsNullOrEmpty(word)) word = LexemeForm[ws]; - return word; + return word.Trim(); } protected static Expression> HeadwordExpression() => - (e, ws) => Json.Value(e.CitationForm, ms => ms[ws]) ?? Json.Value(e.LexemeForm, ms => ms[ws]); + (e, ws) => (string.IsNullOrEmpty(Json.Value(e.CitationForm, ms => ms[ws])) ? Json.Value(e.LexemeForm, ms => ms[ws]) : Json.Value(e.CitationForm, ms => ms[ws]))!.Trim(); public Guid[] GetReferences() { diff --git a/backend/FwLite/LcmCrdt/Objects/Sense.cs b/backend/FwLite/LcmCrdt/Objects/Sense.cs index d23538d03..dcc46d282 100644 --- a/backend/FwLite/LcmCrdt/Objects/Sense.cs +++ b/backend/FwLite/LcmCrdt/Objects/Sense.cs @@ -11,6 +11,20 @@ namespace LcmCrdt.Objects; public class Sense : MiniLcm.Sense, IObjectBase { + public static Sense FromMiniLcm(MiniLcm.Sense sense, Guid entryId) + { + return new Sense + { + Id = sense.Id, + Definition = sense.Definition, + Gloss = sense.Gloss, + PartOfSpeech = sense.PartOfSpeech, + PartOfSpeechId = sense.PartOfSpeechId, + SemanticDomains = sense.SemanticDomains, + ExampleSentences = sense.ExampleSentences, + EntryId = entryId + }; + } public static IEnumerable ChangesFromJsonPatch(Sense sense, JsonPatchDocument patch) { foreach (var rewriteChange in patch.RewriteChanges(s => s.PartOfSpeechId, diff --git a/backend/FwLite/LcmCrdt/Objects/WritingSystem.cs b/backend/FwLite/LcmCrdt/Objects/WritingSystem.cs index d0f3a1441..4fd96b397 100644 --- a/backend/FwLite/LcmCrdt/Objects/WritingSystem.cs +++ b/backend/FwLite/LcmCrdt/Objects/WritingSystem.cs @@ -48,7 +48,8 @@ public IObjectBase Copy() Font = Font, Exemplars = Exemplars, DeletedAt = DeletedAt, - Type = Type + Type = Type, + Order = Order }; } diff --git a/backend/FwLite/LcmCrdt/ProjectsService.cs b/backend/FwLite/LcmCrdt/ProjectsService.cs index 80d9df4ce..798eda4b3 100644 --- a/backend/FwLite/LcmCrdt/ProjectsService.cs +++ b/backend/FwLite/LcmCrdt/ProjectsService.cs @@ -1,12 +1,14 @@ using Crdt; using Crdt.Db; +using LcmCrdt.Utils; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; using MiniLcm; using PartOfSpeech = LcmCrdt.Objects.PartOfSpeech; namespace LcmCrdt; -public class ProjectsService(IServiceProvider provider, ProjectContext projectContext) +public class ProjectsService(IServiceProvider provider, ProjectContext projectContext, ILogger logger) { public Task ListProjects() { @@ -34,16 +36,30 @@ public async Task CreateProject(string name, Uri? domain = null, Func? afterCreate = null) { + //poor man's sanitation + name = Path.GetFileName(name); var sqliteFile = $"{name}.sqlite"; if (File.Exists(sqliteFile)) throw new InvalidOperationException("Project already exists"); var crdtProject = new CrdtProject(name, sqliteFile); await using var serviceScope = CreateProjectScope(crdtProject); var db = serviceScope.ServiceProvider.GetRequiredService(); - var projectData = new ProjectData(name, id ?? Guid.NewGuid(), ProjectData.GetOriginDomain(domain), Guid.NewGuid()); - await InitProjectDb(db, projectData); - await serviceScope.ServiceProvider.GetRequiredService().PopulateProjectDataCache(); - await SeedSystemData(serviceScope.ServiceProvider.GetRequiredService(), projectData.ClientId); - await (afterCreate?.Invoke(serviceScope.ServiceProvider, crdtProject) ?? Task.CompletedTask); + try + { + var projectData = new ProjectData(name, + id ?? Guid.NewGuid(), + ProjectData.GetOriginDomain(domain), + Guid.NewGuid()); + await InitProjectDb(db, projectData); + await serviceScope.ServiceProvider.GetRequiredService().PopulateProjectDataCache(); + await SeedSystemData(serviceScope.ServiceProvider.GetRequiredService(), projectData.ClientId); + await (afterCreate?.Invoke(serviceScope.ServiceProvider, crdtProject) ?? Task.CompletedTask); + } + catch + { + logger.LogError("Failed to create project {Project}, deleting database", crdtProject.Name); + await db.Database.EnsureDeletedAsync(); + throw; + } return crdtProject; } diff --git a/backend/FwLite/LocalWebApp/Hubs/CrdtMiniLcmApiHub.cs b/backend/FwLite/LocalWebApp/Hubs/CrdtMiniLcmApiHub.cs index 759067d7c..3a25acd1c 100644 --- a/backend/FwLite/LocalWebApp/Hubs/CrdtMiniLcmApiHub.cs +++ b/backend/FwLite/LocalWebApp/Hubs/CrdtMiniLcmApiHub.cs @@ -1,6 +1,4 @@ -using Microsoft.AspNetCore.Http.Json; -using Microsoft.AspNetCore.SignalR; -using Microsoft.Extensions.Options; +using Microsoft.AspNetCore.SignalR; using MiniLcm; using SystemTextJsonPatch; @@ -8,9 +6,8 @@ namespace LocalWebApp.Hubs; public class CrdtMiniLcmApiHub( ILexboxApi lexboxApi, - IOptions jsonOptions, BackgroundSyncService backgroundSyncService, - SyncService syncService) : Hub + SyncService syncService) : MiniLcmApiHubBase(lexboxApi) { public const string ProjectRouteKey = "project"; public override async Task OnConnectedAsync() @@ -18,113 +15,70 @@ public override async Task OnConnectedAsync() await syncService.ExecuteSync(); } - public async Task GetWritingSystems() + public override async Task CreateWritingSystem(WritingSystemType type, WritingSystem writingSystem) { - return await lexboxApi.GetWritingSystems(); - } - - public async Task CreateWritingSystem(WritingSystemType type, WritingSystem writingSystem) - { - var newWritingSystem = await lexboxApi.CreateWritingSystem(type, writingSystem); + var newWritingSystem = await base.CreateWritingSystem(type, writingSystem); backgroundSyncService.TriggerSync(); return newWritingSystem; } - public async Task UpdateWritingSystem(WritingSystemId id, WritingSystemType type, JsonPatchDocument update) + public override async Task UpdateWritingSystem(WritingSystemId id, WritingSystemType type, JsonPatchDocument update) { - var writingSystem = await lexboxApi.UpdateWritingSystem(id, type, new JsonPatchUpdateInput(update)); + var writingSystem = await base.UpdateWritingSystem(id, type, update); backgroundSyncService.TriggerSync(); return writingSystem; } - public IAsyncEnumerable GetEntriesForExemplar(string exemplar, QueryOptions? options = null) - { - throw new NotImplementedException(); - } - - public IAsyncEnumerable GetEntries(QueryOptions? options = null) - { - return lexboxApi.GetEntries(options); - } - - public IAsyncEnumerable SearchEntries(string query, QueryOptions? options = null) - { - return lexboxApi.SearchEntries(query, options); - } - - public async Task GetEntry(Guid id) + public override async Task CreateEntry(Entry entry) { - return await lexboxApi.GetEntry(id); - } - - public async Task CreateEntry(Entry entry) - { - var newEntry = await lexboxApi.CreateEntry(entry); + var newEntry = await base.CreateEntry(entry); await NotifyEntryUpdated(newEntry); return newEntry; } - public async Task UpdateEntry(Guid id, JsonPatchDocument update) + public override async Task UpdateEntry(Guid id, JsonPatchDocument update) { - var entry = await lexboxApi.UpdateEntry(id, new JsonPatchUpdateInput(update)); + var entry = await base.UpdateEntry(id, update); await NotifyEntryUpdated(entry); return entry; } - public async Task DeleteEntry(Guid id) - { - await lexboxApi.DeleteEntry(id); - } - - public async Task CreateSense(Guid entryId, Sense sense) + public override async Task CreateSense(Guid entryId, Sense sense) { - var createdSense = await lexboxApi.CreateSense(entryId, sense); + var createdSense = await base.CreateSense(entryId, sense); backgroundSyncService.TriggerSync(); return createdSense; } - public async Task UpdateSense(Guid entryId, Guid senseId, JsonPatchDocument update) + public override async Task UpdateSense(Guid entryId, Guid senseId, JsonPatchDocument update) { - var sense = await lexboxApi.UpdateSense(entryId, senseId, new JsonPatchUpdateInput(update)); + var sense = await base.UpdateSense(entryId, senseId, update); backgroundSyncService.TriggerSync(); return sense; } - public async Task DeleteSense(Guid entryId, Guid senseId) - { - await lexboxApi.DeleteSense(entryId, senseId); - } - - public async Task CreateExampleSentence(Guid entryId, + public override async Task CreateExampleSentence(Guid entryId, Guid senseId, ExampleSentence exampleSentence) { - var createdSentence = await lexboxApi.CreateExampleSentence(entryId, senseId, exampleSentence); + var createdSentence = await base.CreateExampleSentence(entryId, senseId, exampleSentence); backgroundSyncService.TriggerSync(); return createdSentence; } - public async Task UpdateExampleSentence(Guid entryId, + public override async Task UpdateExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId, JsonPatchDocument update) { - var sentence = await lexboxApi.UpdateExampleSentence(entryId, - senseId, - exampleSentenceId, - new JsonPatchUpdateInput(update)); + var sentence = await base.UpdateExampleSentence(entryId, senseId, exampleSentenceId, update); backgroundSyncService.TriggerSync(); return sentence; } - public async Task DeleteExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId) - { - await lexboxApi.DeleteExampleSentence(entryId, senseId, exampleSentenceId); - } - - private async Task NotifyEntryUpdated(Entry entry) + protected override async Task NotifyEntryUpdated(Entry entry) { - await Clients.Others.OnEntryUpdated(entry); + await base.NotifyEntryUpdated(entry); backgroundSyncService.TriggerSync(); } } diff --git a/backend/FwLite/LocalWebApp/Hubs/FwDataMiniLcmHub.cs b/backend/FwLite/LocalWebApp/Hubs/FwDataMiniLcmHub.cs index 736faf46b..8cccf6306 100644 --- a/backend/FwLite/LocalWebApp/Hubs/FwDataMiniLcmHub.cs +++ b/backend/FwLite/LocalWebApp/Hubs/FwDataMiniLcmHub.cs @@ -8,7 +8,7 @@ namespace LocalWebApp.Hubs; public class FwDataMiniLcmHub([FromKeyedServices(FwDataBridgeKernel.FwDataApiKey)] ILexboxApi lexboxApi, FwDataFactory fwDataFactory, - FwDataProjectContext context) : Hub + FwDataProjectContext context) : MiniLcmApiHubBase(lexboxApi) { public const string ProjectRouteKey = "fwdata"; public override async Task OnConnectedAsync() @@ -41,117 +41,4 @@ public override async Task OnDisconnectedAsync(Exception? exception) } await Groups.RemoveFromGroupAsync(Context.ConnectionId, project.Name); } - - public async Task GetWritingSystems() - { - return await lexboxApi.GetWritingSystems(); - } - - public async Task CreateWritingSystem(WritingSystemType type, WritingSystem writingSystem) - { - var newWritingSystem = await lexboxApi.CreateWritingSystem(type, writingSystem); - return newWritingSystem; - } - - public async Task UpdateWritingSystem(WritingSystemId id, WritingSystemType type, JsonPatchDocument update) - { - var writingSystem = await lexboxApi.UpdateWritingSystem(id, type, new JsonPatchUpdateInput(update)); - return writingSystem; - } - - public IAsyncEnumerable GetPartsOfSpeech() - { - return lexboxApi.GetPartsOfSpeech(); - } - - public IAsyncEnumerable GetSemanticDomains() - { - return lexboxApi.GetSemanticDomains(); - } - - public IAsyncEnumerable GetEntriesForExemplar(string exemplar, QueryOptions? options = null) - { - throw new NotImplementedException(); - } - - public IAsyncEnumerable GetEntries(QueryOptions? options = null) - { - return lexboxApi.GetEntries(options); - } - - public IAsyncEnumerable SearchEntries(string query, QueryOptions? options = null) - { - return lexboxApi.SearchEntries(query, options); - } - - public async Task GetEntry(Guid id) - { - return await lexboxApi.GetEntry(id); - } - - public async Task CreateEntry(Entry entry) - { - var newEntry = await lexboxApi.CreateEntry(entry); - await NotifyEntryUpdated(newEntry); - return newEntry; - } - - public async Task UpdateEntry(Guid id, JsonPatchDocument update) - { - var entry = await lexboxApi.UpdateEntry(id, new JsonPatchUpdateInput(update)); - await NotifyEntryUpdated(entry); - return entry; - } - - public async Task DeleteEntry(Guid id) - { - await lexboxApi.DeleteEntry(id); - } - - public async Task CreateSense(Guid entryId, Sense sense) - { - var createdSense = await lexboxApi.CreateSense(entryId, sense); - return createdSense; - } - - public async Task UpdateSense(Guid entryId, Guid senseId, JsonPatchDocument update) - { - var sense = await lexboxApi.UpdateSense(entryId, senseId, new JsonPatchUpdateInput(update)); - return sense; - } - - public async Task DeleteSense(Guid entryId, Guid senseId) - { - await lexboxApi.DeleteSense(entryId, senseId); - } - - public async Task CreateExampleSentence(Guid entryId, - Guid senseId, - ExampleSentence exampleSentence) - { - var createdSentence = await lexboxApi.CreateExampleSentence(entryId, senseId, exampleSentence); - return createdSentence; - } - - public async Task UpdateExampleSentence(Guid entryId, - Guid senseId, - Guid exampleSentenceId, - JsonPatchDocument update) - { - var sentence = await lexboxApi.UpdateExampleSentence(entryId, - senseId, - exampleSentenceId, - new JsonPatchUpdateInput(update)); - return sentence; - } - - public async Task DeleteExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId) - { - await lexboxApi.DeleteExampleSentence(entryId, senseId, exampleSentenceId); - } - - private async Task NotifyEntryUpdated(Entry entry) - { - await Clients.Others.OnEntryUpdated(entry); - } } diff --git a/backend/FwLite/LocalWebApp/Hubs/MiniLcmApiHubBase.cs b/backend/FwLite/LocalWebApp/Hubs/MiniLcmApiHubBase.cs new file mode 100644 index 000000000..361283314 --- /dev/null +++ b/backend/FwLite/LocalWebApp/Hubs/MiniLcmApiHubBase.cs @@ -0,0 +1,120 @@ +using Microsoft.AspNetCore.SignalR; +using Microsoft.Extensions.Options; +using MiniLcm; +using SystemTextJsonPatch; + +namespace LocalWebApp.Hubs; + +public abstract class MiniLcmApiHubBase(ILexboxApi lexboxApi) : Hub +{ + public async Task GetWritingSystems() + { + return await lexboxApi.GetWritingSystems(); + } + + public virtual async Task CreateWritingSystem(WritingSystemType type, WritingSystem writingSystem) + { + var newWritingSystem = await lexboxApi.CreateWritingSystem(type, writingSystem); + return newWritingSystem; + } + + public virtual async Task UpdateWritingSystem(WritingSystemId id, + WritingSystemType type, + JsonPatchDocument update) + { + var writingSystem = + await lexboxApi.UpdateWritingSystem(id, type, new JsonPatchUpdateInput(update)); + return writingSystem; + } + + public IAsyncEnumerable GetPartsOfSpeech() + { + return lexboxApi.GetPartsOfSpeech(); + } + + public IAsyncEnumerable GetSemanticDomains() + { + return lexboxApi.GetSemanticDomains(); + } + + public IAsyncEnumerable GetEntries(QueryOptions? options = null) + { + return lexboxApi.GetEntries(options); + } + + public IAsyncEnumerable SearchEntries(string query, QueryOptions? options = null) + { + return lexboxApi.SearchEntries(query, options); + } + + public async Task GetEntry(Guid id) + { + return await lexboxApi.GetEntry(id); + } + + public virtual async Task CreateEntry(Entry entry) + { + var newEntry = await lexboxApi.CreateEntry(entry); + await NotifyEntryUpdated(newEntry); + return newEntry; + } + + public virtual async Task UpdateEntry(Guid id, JsonPatchDocument update) + { + var entry = await lexboxApi.UpdateEntry(id, new JsonPatchUpdateInput(update)); + await NotifyEntryUpdated(entry); + return entry; + } + + public async Task DeleteEntry(Guid id) + { + await lexboxApi.DeleteEntry(id); + } + + public virtual async Task CreateSense(Guid entryId, Sense sense) + { + var createdSense = await lexboxApi.CreateSense(entryId, sense); + return createdSense; + } + + public virtual async Task UpdateSense(Guid entryId, Guid senseId, JsonPatchDocument update) + { + var sense = await lexboxApi.UpdateSense(entryId, senseId, new JsonPatchUpdateInput(update)); + return sense; + } + + public async Task DeleteSense(Guid entryId, Guid senseId) + { + await lexboxApi.DeleteSense(entryId, senseId); + } + + public virtual async Task CreateExampleSentence(Guid entryId, + Guid senseId, + ExampleSentence exampleSentence) + { + var createdSentence = await lexboxApi.CreateExampleSentence(entryId, senseId, exampleSentence); + return createdSentence; + } + + public virtual async Task UpdateExampleSentence(Guid entryId, + Guid senseId, + Guid exampleSentenceId, + JsonPatchDocument update) + { + var sentence = await lexboxApi.UpdateExampleSentence(entryId, + senseId, + exampleSentenceId, + new JsonPatchUpdateInput(update)); + return sentence; + } + + public async Task DeleteExampleSentence(Guid entryId, Guid senseId, Guid exampleSentenceId) + { + await lexboxApi.DeleteExampleSentence(entryId, senseId, exampleSentenceId); + } + + protected virtual async Task NotifyEntryUpdated(Entry entry) + { + await Clients.Others.OnEntryUpdated(entry); + } +} diff --git a/backend/FwLite/LocalWebApp/LocalWebApp.csproj b/backend/FwLite/LocalWebApp/LocalWebApp.csproj index 13ad4392f..b60f0832b 100644 --- a/backend/FwLite/LocalWebApp/LocalWebApp.csproj +++ b/backend/FwLite/LocalWebApp/LocalWebApp.csproj @@ -16,6 +16,7 @@ + diff --git a/backend/FwLite/LocalWebApp/Services/ImportFwdataService.cs b/backend/FwLite/LocalWebApp/Services/ImportFwdataService.cs index 5b9cb3b0a..a1cb8b062 100644 --- a/backend/FwLite/LocalWebApp/Services/ImportFwdataService.cs +++ b/backend/FwLite/LocalWebApp/Services/ImportFwdataService.cs @@ -1,6 +1,6 @@ -using FwDataMiniLcmBridge; -using FwDataMiniLcmBridge.Api; -using FwDataMiniLcmBridge.LcmUtils; +using System.Diagnostics; +using FwDataMiniLcmBridge; +using Humanizer; using LcmCrdt; using MiniLcm; @@ -10,23 +10,33 @@ public class ImportFwdataService(ProjectsService projectsService, ILogger Import(string projectName) { + var startTime = Stopwatch.GetTimestamp(); var fwDataProject = FieldWorksProjectList.GetProject(projectName); if (fwDataProject is null) { throw new InvalidOperationException($"Project {projectName} not found."); } using var fwDataApi = fwDataFactory.GetFwDataMiniLcmApi(fwDataProject, false); - var project = await projectsService.CreateProject(fwDataProject.Name, - afterCreate: async (provider, project) => - { - var crdtApi = provider.GetRequiredService(); - await ImportProject(crdtApi, fwDataApi, fwDataApi.EntryCount); - }); - logger.LogInformation("Import of {ProjectName} complete!", fwDataApi.Project.Name); - return project; + try + { + var project = await projectsService.CreateProject(fwDataProject.Name, + afterCreate: async (provider, project) => + { + var crdtApi = provider.GetRequiredService(); + await ImportProject(crdtApi, fwDataApi, fwDataApi.EntryCount); + }); + var timeSpent = Stopwatch.GetElapsedTime(startTime); + logger.LogInformation("Import of {ProjectName} complete, took {TimeSpend}", fwDataApi.Project.Name, timeSpent.Humanize(2)); + return project; + } + catch + { + logger.LogError("Import of {ProjectName} failed, deleting project", fwDataApi.Project.Name); + throw; + } } - async Task ImportProject(ILexboxApi importTo, ILexboxApi importFrom, int entryCount) + private async Task ImportProject(ILexboxApi importTo, ILexboxApi importFrom, int entryCount) { var writingSystems = await importFrom.GetWritingSystems(); foreach (var ws in writingSystems.Analysis) @@ -41,19 +51,37 @@ async Task ImportProject(ILexboxApi importTo, ILexboxApi importFrom, int entryCo logger.LogInformation("Imported ws {WsId}", ws.Id); } - var index = 0; - await foreach (var entry in importFrom.GetEntries(new QueryOptions(Count: 100_000, Offset: 0))) + await foreach (var partOfSpeech in importFrom.GetPartsOfSpeech()) { - if (importTo is CrdtLexboxApi crdtLexboxApi) + await importTo.CreatePartOfSpeech(partOfSpeech); + logger.LogInformation("Imported part of speech {Id}", partOfSpeech.Id); + } + + + var semanticDomains = importFrom.GetSemanticDomains(); + var entries = importFrom.GetEntries(new QueryOptions(Count: 100_000, Offset: 0)); + if (importTo is CrdtLexboxApi crdtLexboxApi) + { + logger.LogInformation("Importing semantic domains"); + await crdtLexboxApi.BulkImportSemanticDomains(semanticDomains.ToBlockingEnumerable()); + logger.LogInformation("Importing {Count} entries", entryCount); + await crdtLexboxApi.BulkCreateEntries(entries); + } + else + { + await foreach (var semanticDomain in semanticDomains) { - await crdtLexboxApi.CreateEntryLite(entry); + await importTo.CreateSemanticDomain(semanticDomain); + logger.LogTrace("Imported semantic domain {Id}", semanticDomain.Id); } - else + + var index = 0; + await foreach (var entry in entries) { await importTo.CreateEntry(entry); + logger.LogTrace("Imported entry, {Index} of {Count} {Id}", index++, entryCount, entry.Id); } - - logger.LogInformation("Imported entry, {Index} of {Count} {Id}", index++, entryCount, entry.Id); } + logger.LogInformation("Imported {Count} entries", entryCount); } } diff --git a/backend/FwLite/MiniLcm/ILexboxApi.cs b/backend/FwLite/MiniLcm/ILexboxApi.cs index 971840434..39186d87c 100644 --- a/backend/FwLite/MiniLcm/ILexboxApi.cs +++ b/backend/FwLite/MiniLcm/ILexboxApi.cs @@ -12,14 +12,12 @@ Task UpdateWritingSystem(WritingSystemId id, WritingSystemType type, UpdateObjectInput update); - IAsyncEnumerable GetPartsOfSpeech() - { - throw new NotImplementedException(); - } - IAsyncEnumerable GetSemanticDomains() - { - throw new NotImplementedException(); - } + IAsyncEnumerable GetPartsOfSpeech(); + Task CreatePartOfSpeech(PartOfSpeech partOfSpeech); + IAsyncEnumerable GetSemanticDomains(); + + Task CreateSemanticDomain(SemanticDomain semanticDomain); + IAsyncEnumerable GetEntries(QueryOptions? options = null); IAsyncEnumerable SearchEntries(string query, QueryOptions? options = null); Task GetEntry(Guid id); diff --git a/backend/FwLite/MiniLcm/InMemoryApi.cs b/backend/FwLite/MiniLcm/InMemoryApi.cs index 07ffec4ae..856981264 100644 --- a/backend/FwLite/MiniLcm/InMemoryApi.cs +++ b/backend/FwLite/MiniLcm/InMemoryApi.cs @@ -138,6 +138,16 @@ public Task UpdateWritingSystem(WritingSystemId id, WritingSystem return Task.FromResult(ws); } + public IAsyncEnumerable GetPartsOfSpeech() + { + throw new NotImplementedException(); + } + + public IAsyncEnumerable GetSemanticDomains() + { + throw new NotImplementedException(); + } + private readonly string[] _exemplars = Enumerable.Range('a', 'z').Select(c => ((char)c).ToString()).ToArray(); public Task CreateEntry(Entry entry) @@ -164,6 +174,15 @@ public Task CreateSense(Guid entryId, Sense sense) return Task.FromResult(sense); } + public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech) + { + throw new NotImplementedException(); + } + + public async Task CreateSemanticDomain(SemanticDomain semanticDomain) + { + throw new NotImplementedException(); + } public Task DeleteEntry(Guid id) { diff --git a/backend/LfClassicData/LfClassicLexboxApi.cs b/backend/LfClassicData/LfClassicLexboxApi.cs index b1ce36a04..698c0e224 100644 --- a/backend/LfClassicData/LfClassicLexboxApi.cs +++ b/backend/LfClassicData/LfClassicLexboxApi.cs @@ -59,6 +59,16 @@ public async IAsyncEnumerable GetPartsOfSpeech() } } + public async Task CreatePartOfSpeech(PartOfSpeech partOfSpeech) + { + throw new NotSupportedException(); + } + + public async Task CreateSemanticDomain(SemanticDomain semanticDomain) + { + throw new NotSupportedException(); + } + public IAsyncEnumerable GetSemanticDomains() { return AsyncEnumerable.Empty(); diff --git a/frontend/viewer/src/HomeView.svelte b/frontend/viewer/src/HomeView.svelte index 34ef4e02e..91ce7d93c 100644 --- a/frontend/viewer/src/HomeView.svelte +++ b/frontend/viewer/src/HomeView.svelte @@ -8,7 +8,7 @@ mdiBookSyncOutline, mdiTestTube, } from '@mdi/js'; - import { navigate } from 'svelte-routing'; + import { links } from 'svelte-routing'; import { Button, Card, type ColumnDef, ListItem, Table, TextField, tableCell, Icon } from 'svelte-ux'; import flexLogo from './lib/assets/flex-logo.png'; import DevContent, { isDev } from './lib/layout/DevContent.svelte'; @@ -40,16 +40,16 @@ projectsPromise = fetchProjects(); } - let loading = ''; + let importing = ''; async function importFwDataProject(name: string) { - loading = name; + importing = name; await fetch(`/api/import/fwdata/${name}`, { method: 'POST', }); projectsPromise = fetchProjects(); await projectsPromise; - loading = ''; + importing = ''; } let downloading = ''; @@ -152,10 +152,10 @@ {#each data ?? [] as rowData, rowIndex} {#each columns as column (column.name)} - + {#if column.name === 'fwdata'} {#if rowData.fwdata} - @@ -187,13 +187,14 @@ {:else if rowData.fwdata}