Skip to content

Commit

Permalink
LT-21760: Prevent Find and Fix from dropping homograph numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
JakeOliver28 committed Jun 13, 2024
1 parent f8406cd commit 134e555
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 12 deletions.
31 changes: 19 additions & 12 deletions src/SIL.LCModel.FixData/HomographFixer.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2012-2018 SIL International
// Copyright (c) 2012-2018 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

Expand Down Expand Up @@ -78,7 +78,10 @@ internal override void InspectElement(XElement rt)
var citationForm = rt.Element("CitationForm");
if (citationForm != null)
{
entriesWithCitationForm.Add(guid, citationForm);
if (citationForm.Elements("AUni").Any(e => e.Attribute("ws")?.Value == m_homographWs))
{
entriesWithCitationForm.Add(guid, citationForm);
}
}
break;
case "LangProject":
Expand Down Expand Up @@ -127,6 +130,7 @@ internal override void FinalFixerInitialization(Dictionary<Guid, Guid> owners, H
var rtForm = rtElem.Element("Form");
if (rtForm == null)
continue;

rtFormText = GetStringInHomographWritingSystem(rtForm);
if (string.IsNullOrWhiteSpace(rtFormText))
continue; // entries with no lexeme form are not considered homographs.
Expand All @@ -142,17 +146,20 @@ internal override void FinalFixerInitialization(Dictionary<Guid, Guid> owners, H

// if there was a citation form which matches the form of this MoStemAllomorph the MorphType
// is not important to the homograph determination.
var key = m_Homographs.ContainsKey(rtFormText) ? rtFormText : rtFormText + m_MorphTypeSort[new Guid(guid)];

var ownerguid = new Guid(rtElem.Attribute("ownerguid").Value);
if (m_Homographs.TryGetValue(key, out guidsForHomograph))
{
guidsForHomograph.Add(ownerguid);
}
else
if (m_Homographs.ContainsKey(rtFormText) || m_MorphTypeSort.Any())
{
guidsForHomograph = new List<Guid> { ownerguid };
m_Homographs.Add(key, guidsForHomograph);
var key = m_Homographs.ContainsKey(rtFormText) ? rtFormText : rtFormText + m_MorphTypeSort[new Guid(guid)];

var ownerguid = new Guid(rtElem.Attribute("ownerguid").Value);
if (m_Homographs.TryGetValue(key, out guidsForHomograph))
{
guidsForHomograph.Add(ownerguid);
}
else
{
guidsForHomograph = new List<Guid> { ownerguid };
m_Homographs.Add(key, guidsForHomograph);
}
}
}

Expand Down
45 changes: 45 additions & 0 deletions tests/SIL.LCModel.FixData.Tests/FwDataFixerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,51 @@ public void GenericDateFixup()
AssertThatXmlIn.File(fileLoc).HasAtLeastOneMatchForXpath("//rt[@class='RnGenericRec']/DateOfEvent[@val='0']");
}

/// <summary>
/// LT-21760 Find and Fix errors is losing homograph number
/// </summary>
[Test]
public void HomographNumberDrops()
{
var testPath = Path.Combine(_basePath, "HomographDrops");
const string lexEntry_ric1Guid = "5c7a2684-97dc-4cac-8b31-4e0db5855b27";
const string lexEntry_ric2Guid = "7dcf9363-29e6-47fb-aa1e-eb8dd1f7afe5";
const string lexEntry_ric3Guid = "74ce3bd5-455b-48ca-986f-e325f8cf96d5";

var testFile = Path.Combine(testPath, "Test.fwdata");

AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//rt[@class=\"LexEntry\" and @guid=\"" + lexEntry_ric1Guid + "\"]", 1);
AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//rt[@class=\"LexEntry\" and @guid=\"" + lexEntry_ric2Guid + "\"]", 1);
AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//rt[@class=\"LexEntry\" and @guid=\"" + lexEntry_ric3Guid + "\"]", 1);

AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//HomographNumber[@val='1']", 1);
AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//HomographNumber[@val='2']", 1);
AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//HomographNumber[@val='3']", 1);

_errors.Clear();
Assert.DoesNotThrow(() =>
{
var data = new FwDataFixer(testFile, new DummyProgressDlg(),
LogErrors, ErrorCount);
// SUT
data.FixErrorsAndSave();
}, "Exception running the data fixer on the sequence test data.");

AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//HomographNumber[@val='1']", 1);
AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//HomographNumber[@val='2']", 1);
AssertThatXmlIn.File(testFile).HasSpecifiedNumberOfMatchesForXpath(
"//HomographNumber[@val='3']", 1);
}

/// <summary>
/// LT-13509 Identical entries homograph numbering inconsistency.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,8 @@
<ProjectReference Include="..\SIL.LCModel.Tests\SIL.LCModel.Tests.csproj" />
</ItemGroup>

<ItemGroup>
<Folder Include="TestData\HomographDrops\" />
</ItemGroup>

</Project>
157 changes: 157 additions & 0 deletions tests/SIL.LCModel.FixData.Tests/TestData/HomographDrops/Test.fwdata
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
<?xml version="1.0" encoding="utf-8"?>
<languageproject version="7000051">
<rt class="LangProject" guid="5c937611-0e49-491b-ad5e-b61dd01a45d4">
<HomographWs>
<Uni>enh</Uni>
</HomographWs>
<AnalysisWss>
<Uni>en ru enh-x-source en-x-ref en-x-geo enh</Uni>
</AnalysisWss>
<CurAnalysisWss>
<Uni>en ru enh-x-source en-x-ref en-x-geo enh</Uni>
</CurAnalysisWss>
<CurPronunWss>
<Uni>enh</Uni>
</CurPronunWss>
<CurVernWss>
<Uni>enh enh-x-source</Uni>
</CurVernWss>
<DateCreated val="2021-03-16 21:26:31.627" />
<DateModified val="2024-03-14 17:37:44.271" />
<HomographWs>
<Uni>enh</Uni>
</HomographWs>
<LexDb>
<objsur guid="af26d792-ea5e-11de-8f7e-0013722f8dec" t="o" />
</LexDb>
<LinkedFilesRootDir>
<Uni>%proj%\LinkedFiles</Uni>
</LinkedFilesRootDir>
<VernWss>
<Uni>enh enh-x-source</Uni>
</VernWss>
</rt>
<rt class="LexEntry" guid="5c7a2684-97dc-4cac-8b31-4e0db5855b27">
<CitationForm>
<AUni ws="enh-x-source">+</AUni>
</CitationForm>
<DateCreated val="2024-02-12 12:57:59.009" />
<DateModified val="2024-03-14 15:59:21.262" />
<DoNotUseForParsing val="False" />
<HomographNumber val="3" />
<LexemeForm>
<objsur guid="76510343-5ba9-4455-bf15-4edd6bcf8fdc" t="o" />
</LexemeForm>
<MorphoSyntaxAnalyses />
</rt>
<rt class="MoAffixAllomorph" guid="76510343-5ba9-4455-bf15-4edd6bcf8fdc" ownerguid="5c7a2684-97dc-4cac-8b31-4e0db5855b27">
<Form>
<AUni ws="enh">riɔ</AUni>
</Form>
<IsAbstract val="False" />
<MorphType>
<objsur guid="d7f713dd-e8cf-11d3-9764-00c04f186933" t="r" />
</MorphType>
</rt>
<rt class="MoMorphType" guid="d7f713dd-e8cf-11d3-9764-00c04f186933" ownerguid="d7f713d8-e8cf-11d3-9764-00c04f186933">
<Abbreviation>
<AUni ws="en">sfx</AUni>
</Abbreviation>
<BackColor val="0" />
<DateCreated val="2021-03-16 21:26:31.760" />
<DateModified val="2021-03-16 21:26:31.760" />
<Description>
<AStr ws="en">
<Run ws="en">A suffix is an affix that is attached to the end of a root or stem.</Run>
</AStr>
</Description>
<ForeColor val="0" />
<Hidden val="False" />
<IsProtected val="True" />
<Name>
<AUni ws="en">suffix</AUni>
</Name>
<Prefix>
<Uni>-</Uni>
</Prefix>
<SecondaryOrder val="70" />
<SortSpec val="0" />
<UnderColor val="0" />
<UnderStyle val="0" />
</rt>
<rt class="LexEntry" guid="74ce3bd5-455b-48ca-986f-e325f8cf96d5">
<DateCreated val="2124-03-12 16:20:00.000" />
<DateModified val="2024-03-13 17:15:53.695" />
<DoNotUseForParsing val="False" />
<HomographNumber val="2" />
<LexemeForm>
<objsur guid="ead4f61a-2bb8-4413-8102-bc936f916988" t="o" />
</LexemeForm>
<MorphoSyntaxAnalyses />
</rt>
<rt class="MoAffixAllomorph" guid="ead4f61a-2bb8-4413-8102-bc936f916988" ownerguid="74ce3bd5-455b-48ca-986f-e325f8cf96d5">
<Form>
<AUni ws="enh">riɔ</AUni>
</Form>
<IsAbstract val="False" />
<MorphType>
<objsur guid="d7f713dd-e8cf-11d3-9764-00c04f186933" t="r" />
</MorphType>
</rt>
<rt class="LexEntry" guid="7dcf9363-29e6-47fb-aa1e-eb8dd1f7afe5">
<DateCreated val="2124-03-12 16:20:00.000" />
<DateModified val="2024-03-13 17:15:53.695" />
<DoNotUseForParsing val="False" />
<HomographNumber val="1" />
<LexemeForm>
<objsur guid="64dcc662-315f-49ef-a339-64a0ef7c8c42" t="o" />
</LexemeForm>
<MorphoSyntaxAnalyses />
</rt>
<rt class="MoAffixAllomorph" guid="64dcc662-315f-49ef-a339-64a0ef7c8c42" ownerguid="7dcf9363-29e6-47fb-aa1e-eb8dd1f7afe5">
<Form>
<AUni ws="enh">riɔ</AUni>
</Form>
<IsAbstract val="False" />
<MorphType>
<objsur guid="d7f713dd-e8cf-11d3-9764-00c04f186933" t="r" />
</MorphType>
</rt>
<rt class="CmPossibilityList" guid="d7f713d8-e8cf-11d3-9764-00c04f186933" ownerguid="af26d792-ea5e-11de-8f7e-0013722f8dec">
<Abbreviation>
<AUni ws="en">MjeTyp</AUni>
</Abbreviation>
<DateCreated val="2018-08-02 16:32:06.267" />
<DateModified val="2018-08-02 16:32:06.267" />
<Depth val="1" />
<DisplayOption val="0" />
<IsClosed val="True" />
<IsSorted val="True" />
<IsVernacular val="False" />
<ItemClsid val="5042" />
<ListVersion val="00000000-0000-0000-0000-000000000000" />
<Name>
<AUni ws="en">Morpheme Types</AUni>
</Name>
<Possibilities>
<objsur guid="d7f713dd-e8cf-11d3-9764-00c04f186933" t="o" />
</Possibilities>
<PreventChoiceAboveLevel val="0" />
<PreventDuplicates val="False" />
<PreventNodeChoices val="False" />
<UseExtendedFields val="False" />
<WsSelector val="-3" />
</rt>
<rt class="LexDb" guid="af26d792-ea5e-11de-8f7e-0013722f8dec" ownerguid="5c937611-0e49-491b-ad5e-b61dd01a45d4">
<DateCreated val="2021-03-16 21:26:29.875" />
<DateModified val="2021-03-16 21:26:29.875" />
<IsBodyInSeparateSubentry val="False" />
<IsHeadwordCitationForm val="False" />
<MorphTypes>
<objsur guid="d7f713d8-e8cf-11d3-9764-00c04f186933" t="o" />
</MorphTypes>
<Name>
<AUni ws="en">Dictionary</AUni>
</Name>
</rt>
</languageproject>

0 comments on commit 134e555

Please sign in to comment.