From 134e55514e48238e766ff778457138e112f48682 Mon Sep 17 00:00:00 2001 From: Jake Oliver Date: Thu, 13 Jun 2024 16:33:56 -0400 Subject: [PATCH] LT-21760: Prevent Find and Fix from dropping homograph numbers --- src/SIL.LCModel.FixData/HomographFixer.cs | 31 ++-- .../FwDataFixerTests.cs | 45 +++++ .../SIL.LCModel.FixData.Tests.csproj | 4 + .../TestData/HomographDrops/Test.fwdata | 157 ++++++++++++++++++ 4 files changed, 225 insertions(+), 12 deletions(-) create mode 100644 tests/SIL.LCModel.FixData.Tests/TestData/HomographDrops/Test.fwdata diff --git a/src/SIL.LCModel.FixData/HomographFixer.cs b/src/SIL.LCModel.FixData/HomographFixer.cs index 999d60a6..b2549edf 100644 --- a/src/SIL.LCModel.FixData/HomographFixer.cs +++ b/src/SIL.LCModel.FixData/HomographFixer.cs @@ -1,4 +1,4 @@ -// Copyright (c) 2012-2018 SIL International +// Copyright (c) 2012-2018 SIL International // This software is licensed under the LGPL, version 2.1 or later // (http://www.gnu.org/licenses/lgpl-2.1.html) @@ -78,7 +78,10 @@ internal override void InspectElement(XElement rt) var citationForm = rt.Element("CitationForm"); if (citationForm != null) { - entriesWithCitationForm.Add(guid, citationForm); + if (citationForm.Elements("AUni").Any(e => e.Attribute("ws")?.Value == m_homographWs)) + { + entriesWithCitationForm.Add(guid, citationForm); + } } break; case "LangProject": @@ -127,6 +130,7 @@ internal override void FinalFixerInitialization(Dictionary owners, H var rtForm = rtElem.Element("Form"); if (rtForm == null) continue; + rtFormText = GetStringInHomographWritingSystem(rtForm); if (string.IsNullOrWhiteSpace(rtFormText)) continue; // entries with no lexeme form are not considered homographs. @@ -142,17 +146,20 @@ internal override void FinalFixerInitialization(Dictionary owners, H // if there was a citation form which matches the form of this MoStemAllomorph the MorphType // is not important to the homograph determination. - var key = m_Homographs.ContainsKey(rtFormText) ? rtFormText : rtFormText + m_MorphTypeSort[new Guid(guid)]; - - var ownerguid = new Guid(rtElem.Attribute("ownerguid").Value); - if (m_Homographs.TryGetValue(key, out guidsForHomograph)) - { - guidsForHomograph.Add(ownerguid); - } - else + if (m_Homographs.ContainsKey(rtFormText) || m_MorphTypeSort.Any()) { - guidsForHomograph = new List { ownerguid }; - m_Homographs.Add(key, guidsForHomograph); + var key = m_Homographs.ContainsKey(rtFormText) ? rtFormText : rtFormText + m_MorphTypeSort[new Guid(guid)]; + + var ownerguid = new Guid(rtElem.Attribute("ownerguid").Value); + if (m_Homographs.TryGetValue(key, out guidsForHomograph)) + { + guidsForHomograph.Add(ownerguid); + } + else + { + guidsForHomograph = new List { ownerguid }; + m_Homographs.Add(key, guidsForHomograph); + } } } diff --git a/tests/SIL.LCModel.FixData.Tests/FwDataFixerTests.cs b/tests/SIL.LCModel.FixData.Tests/FwDataFixerTests.cs index cee8b43f..1fe4f72c 100644 --- a/tests/SIL.LCModel.FixData.Tests/FwDataFixerTests.cs +++ b/tests/SIL.LCModel.FixData.Tests/FwDataFixerTests.cs @@ -862,6 +862,51 @@ public void GenericDateFixup() AssertThatXmlIn.File(fileLoc).HasAtLeastOneMatchForXpath("//rt[@class='RnGenericRec']/DateOfEvent[@val='0']"); } + /// + /// LT-21760 Find and Fix errors is losing homograph number + /// + [Test] + public void HomographNumberDrops() + { + var testPath = Path.Combine(_basePath, "HomographDrops"); + const string lexEntry_ric1Guid = "5c7a2684-97dc-4cac-8b31-4e0db5855b27"; + const string lexEntry_ric2Guid = "7dcf9363-29e6-47fb-aa1e-eb8dd1f7afe5"; + const string lexEntry_ric3Guid = "74ce3bd5-455b-48ca-986f-e325f8cf96d5"; + + var testFile = Path.Combine(testPath, "Test.fwdata"); + + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//rt[@class=\"LexEntry\" and @guid=\"" + lexEntry_ric1Guid + "\"]", 1); + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//rt[@class=\"LexEntry\" and @guid=\"" + lexEntry_ric2Guid + "\"]", 1); + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//rt[@class=\"LexEntry\" and @guid=\"" + lexEntry_ric3Guid + "\"]", 1); + + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//HomographNumber[@val='1']", 1); + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//HomographNumber[@val='2']", 1); + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//HomographNumber[@val='3']", 1); + + _errors.Clear(); + Assert.DoesNotThrow(() => + { + var data = new FwDataFixer(testFile, new DummyProgressDlg(), + LogErrors, ErrorCount); + + // SUT + data.FixErrorsAndSave(); + }, "Exception running the data fixer on the sequence test data."); + + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//HomographNumber[@val='1']", 1); + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//HomographNumber[@val='2']", 1); + AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath( + "//HomographNumber[@val='3']", 1); + } + /// /// LT-13509 Identical entries homograph numbering inconsistency. /// diff --git a/tests/SIL.LCModel.FixData.Tests/SIL.LCModel.FixData.Tests.csproj b/tests/SIL.LCModel.FixData.Tests/SIL.LCModel.FixData.Tests.csproj index 98e1ce72..1e54d007 100644 --- a/tests/SIL.LCModel.FixData.Tests/SIL.LCModel.FixData.Tests.csproj +++ b/tests/SIL.LCModel.FixData.Tests/SIL.LCModel.FixData.Tests.csproj @@ -20,4 +20,8 @@ + + + + diff --git a/tests/SIL.LCModel.FixData.Tests/TestData/HomographDrops/Test.fwdata b/tests/SIL.LCModel.FixData.Tests/TestData/HomographDrops/Test.fwdata new file mode 100644 index 00000000..74f4adde --- /dev/null +++ b/tests/SIL.LCModel.FixData.Tests/TestData/HomographDrops/Test.fwdata @@ -0,0 +1,157 @@ + + + + +enh + + +en ru enh-x-source en-x-ref en-x-geo enh + + +en ru enh-x-source en-x-ref en-x-geo enh + + +enh + + +enh enh-x-source + + + + +enh + + + + + +%proj%\LinkedFiles + + +enh enh-x-source + + + + ++ + + + + + + + + + + + +
+riɔ +
+ + + + +
+ + +sfx + + + + + + +A suffix is an affix that is attached to the end of a root or stem. + + + + + + +suffix + + +- + + + + + + + + + + + + + + + + + +
+riɔ +
+ + + + +
+ + + + + + + + + + + +
+riɔ +
+ + + + +
+ + +MjeTyp + + + + + + + + + + + +Morpheme Types + + + + + + + + + + + + + + + + + + + +Dictionary + + +
\ No newline at end of file