diff --git a/LCM.sln.DotSettings b/LCM.sln.DotSettings index 95be78e0..59cfed81 100644 --- a/LCM.sln.DotSettings +++ b/LCM.sln.DotSettings @@ -8,6 +8,7 @@ (?<=\W|^)(?<TAG>REVIEW)(\W|$)(.*) Normal True + True True True True diff --git a/src/SIL.LCModel.Core/Text/TsStringUtils.cs b/src/SIL.LCModel.Core/Text/TsStringUtils.cs index b64ab04e..267d6a5e 100644 --- a/src/SIL.LCModel.Core/Text/TsStringUtils.cs +++ b/src/SIL.LCModel.Core/Text/TsStringUtils.cs @@ -1,4 +1,4 @@ -// Copyright (c) 2004-2020 SIL International +// Copyright (c) 2004-2022 SIL International // This software is licensed under the LGPL, version 2.1 or later // (http://www.gnu.org/licenses/lgpl-2.1.html) @@ -1856,6 +1856,14 @@ public static bool IsNullOrEmpty(ITsString testMe) { return testMe == null || testMe.Length <= 0; } + + /// + /// True if the string is null, empty, or a placeholder (e.g. ***) + /// + public static bool IsNullOrPlaceholder(ITsString testMe, string placeholder) + { + return IsNullOrEmpty(testMe) || testMe.Text.Equals(placeholder); + } } #endregion diff --git a/src/SIL.LCModel/DomainImpl/ScrTxtPara.cs b/src/SIL.LCModel/DomainImpl/ScrTxtPara.cs index ac109252..f01ea2d6 100644 --- a/src/SIL.LCModel/DomainImpl/ScrTxtPara.cs +++ b/src/SIL.LCModel/DomainImpl/ScrTxtPara.cs @@ -1,4 +1,4 @@ -// Copyright (c) 2003-2018 SIL International +// Copyright (c) 2003-2022 SIL International // This software is licensed under the LGPL, version 2.1 or later // (http://www.gnu.org/licenses/lgpl-2.1.html) @@ -6,6 +6,9 @@ using System.Collections; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; using Icu; using SIL.LCModel.Core.KernelInterfaces; using SIL.LCModel.Core.Scripture; @@ -2117,7 +2120,7 @@ public override ITsString Reference(ISegment seg, int ich) { var stText = Owner as IStText; if (stText == null) - return Cache.MakeUserTss("unknown"); // should never happen, I think? + return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think? if (stText.OwningFlid == ScrSectionTags.kflidContent) { // Body of Scripture. Figure a book/chapter/verse @@ -2125,17 +2128,70 @@ public override ITsString Reference(ISegment seg, int ich) string mainRef = ScriptureServices.FullScrRef(this, ich, book.BestUIAbbrev).Trim(); return Cache.MakeUserTss(mainRef + ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg))); } - if (stText.OwningFlid == ScrSectionTags.kflidHeading) + //if (stText.OwningFlid == ScrSectionTags.kflidHeading) + //{ + // // use the section title without qualifiers. + // return stText.Title.BestVernacularAnalysisAlternative; + //} + //if (stText.OwningFlid == ScrBookTags.kflidTitle) + //{ + // return stText.Title.BestVernacularAnalysisAlternative; + //} + return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think? + } + + /// + public override ITsString ReferenceForSorting(ISegment seg, int ich) + { + if (!(Owner is IStText stText)) { - // use the section title without qualifiers. - return stText.Title.BestVernacularAnalysisAlternative; + return Scripture.Name.NotFoundTss; + } + + // Use a prefix to make scripture references sort together when mixed with other references + // (Scripture is sorted canonically, but a comparer for mixed references would sort alphabetically) + var bldr = new StringBuilder(RefForSortingPrefix); + switch (stText.OwningFlid) + { + case ScrSectionTags.kflidContent: + var book = (IScrBook)stText.Owner.Owner; + // Append the book number to sort in canonical order. + bldr.Append(book.CanonicalNum); + // Append the book name. It makes no difference for sorting, but could make debugging easier. + bldr.Append("_").Append(book.BestUIAbbrev); + + var refSansBookBldr = new StringBuilder(ScriptureServices.FullScrRef(this, ich, string.Empty).Trim()); + var numbersInRef = new Regex(@"\d+").Matches(refSansBookBldr.ToString()); + foreach (var number in numbersInRef.Cast().Reverse()) + { + ZeroPadForStringComparison(refSansBookBldr, number.Index, number.Length); + } + bldr.Append(" ").Append(refSansBookBldr).Append(ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg))); + + // add ich + bldr.Append(" ").Append(ZeroPadForStringComparison(ich)); + return Cache.MakeUserTss(bldr.ToString()); } - if (stText.OwningFlid == ScrBookTags.kflidTitle) + return Cache.MakeUserTss(Strings.ksStars); + } + + protected internal const string RefForSortingPrefix = "0 Scr "; + + protected internal static void ZeroPadForStringComparison(StringBuilder bldr, int index, int cExistingDigits) + { + for (var remaining = 5 - cExistingDigits; remaining > 0; remaining--) { - return stText.Title.BestVernacularAnalysisAlternative; + bldr.Insert(index, "0"); } - return Cache.MakeUserTss("unknown"); // should never happen, I think? } + + protected internal static string ZeroPadForStringComparison(string intInRef) + { + var bldr = new StringBuilder(intInRef); + ZeroPadForStringComparison(bldr, 0, intInRef.Length); + return bldr.ToString(); + } + /// ------------------------------------------------------------------------------------ /// /// Gets the footnote sequence. diff --git a/src/SIL.LCModel/DomainImpl/StTxtPara.cs b/src/SIL.LCModel/DomainImpl/StTxtPara.cs index 63e944ea..acf78320 100644 --- a/src/SIL.LCModel/DomainImpl/StTxtPara.cs +++ b/src/SIL.LCModel/DomainImpl/StTxtPara.cs @@ -50,7 +50,7 @@ public IStTxtPara PreviousParagraph /// /// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character /// position (in the whole paragraph), which is assumed to belong to the specified segment. - /// (For now, ich is not actually used, but it may become important if we decide not to split segements for + /// (For now, ich is not actually used, but it may become important if we decide not to split segments for /// verse numbers.) /// Overridden in ScrTxtPara to handle special cases for Scripture refs. /// @@ -100,10 +100,6 @@ public virtual ITsString Reference(ISegment seg, int ich) if (bldr.Length > 0) bldr.Replace(bldr.Length, bldr.Length, " ", props); - // if Scripture.IsResponsibleFor(stText) we should try to get the verse number of the annotation. - //if (stText.OwningFlid == (int)Text.TextTags.kflidContents) - //{ - // Insert paragraph number. int ipara = stText.ParagraphsOS.IndexOf(this) + 1; bldr.Replace(bldr.Length, bldr.Length, ipara.ToString(), props); @@ -117,6 +113,58 @@ public virtual ITsString Reference(ISegment seg, int ich) return bldr.GetString(); } + /// + public virtual ITsString ReferenceForSorting(ISegment seg, int ich) + { + if (!(Owner is IStText stText)) + { + return TsStringUtils.EmptyString(Cache.DefaultUserWs); + } + + ITsString tssName = null; + var fUsingAbbr = false; + if (stText.Owner is IText text) + { + tssName = text.Abbreviation.BestVernacularAnalysisAlternative; + if (!TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text)) + { + fUsingAbbr = true; + } + } + if (!fUsingAbbr) + { + tssName = stText.Title.BestVernacularAnalysisAlternative; + } + + // Make a TsTextProps specifying only the writing system. + var propBldr = TsStringUtils.MakePropsBldr(); + var wsActual = tssName.get_Properties(0).GetIntPropValues((int)FwTextPropType.ktptWs, out _); + propBldr.SetIntPropValues((int)FwTextPropType.ktptWs, (int)FwTextPropVar.ktpvDefault, wsActual); + var props = propBldr.GetTextProps(); + + var bldr = TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text) ? new TsStrBldr() : tssName.GetBldr(); + + // Start with a space even if we don't have a title, so untitled texts sort to the top. + bldr.Append(" ", props); + + // Insert paragraph and segment numbers. + var iPara = stText.ParagraphsOS.IndexOf(this) + 1; + var iSeg = SegmentsOS.IndexOf(seg) + 1; + bldr.Append(ZeroPadForStringComparison(iPara), props).Append(".", props).Append(ZeroPadForStringComparison(iSeg), props); + + // Insert the offset so that two references in the same segment are sorted properly (LT-8457) + bldr.Append(" ", props).Append(ZeroPadForStringComparison(ich), props); + + return bldr.GetString(); + } + + /// Pads the given int with zeroes to the max length of an int + protected internal static string ZeroPadForStringComparison(int i) + { + // because int.MaxValue.ToString().Length is 10 + return i.ToString("D10"); + } + /// ------------------------------------------------------------------------------------ /// /// Finds the ORC of the specified picture and deletes it from the paragraph and any diff --git a/src/SIL.LCModel/InterfaceAdditions.cs b/src/SIL.LCModel/InterfaceAdditions.cs index cdedf74e..49bed223 100644 --- a/src/SIL.LCModel/InterfaceAdditions.cs +++ b/src/SIL.LCModel/InterfaceAdditions.cs @@ -3136,15 +3136,25 @@ IStTxtPara PreviousParagraph List GetChartCellRefs(); /// ------------------------------------------------------------------------------------ - /// - /// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character - /// position (in the whole paragraph), which is assumed to belong to the specified segment. - /// (For now, ich is not actually used, but it may become important if we decide not to split segements for - /// verse numbers.) - /// + /// + /// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character + /// position (in the whole paragraph), which is assumed to belong to the specified segment. + /// (For now, ich is not actually used, but it may become important if we decide not to split segments for + /// verse numbers.) + /// /// ------------------------------------------------------------------------------------ ITsString Reference(ISegment seg, int ich); + /// ------------------------------------------------------------------------------------ + /// + /// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character + /// position (in the whole paragraph), which is assumed to belong to the specified segment. + /// To allow greater accuracy and precision in sorting, numbers are zero-padded to the length of and ich + /// is included at the end. + /// + /// ------------------------------------------------------------------------------------ + ITsString ReferenceForSorting(ISegment seg, int ich); + /// ------------------------------------------------------------------------------------ /// /// Splits the paragraph at the specified character index. diff --git a/tests/SIL.LCModel.Core.Tests/Text/TsStringUtilsTests.cs b/tests/SIL.LCModel.Core.Tests/Text/TsStringUtilsTests.cs index af186b48..f59ba8d5 100644 --- a/tests/SIL.LCModel.Core.Tests/Text/TsStringUtilsTests.cs +++ b/tests/SIL.LCModel.Core.Tests/Text/TsStringUtilsTests.cs @@ -1,4 +1,4 @@ -// Copyright (c) 2004-2021 SIL International +// Copyright (c) 2004-2022 SIL International // This software is licensed under the LGPL, version 2.1 or later // (http://www.gnu.org/licenses/lgpl-2.1.html) @@ -2110,6 +2110,28 @@ public void RemoveIllegalXmlChars() Assert.That(TsStringUtils.RemoveIllegalXmlChars(outOfOrderSurrogates).Text, Is.EqualTo("\xd800\xdc00z")); } + [Test] + public void IsNull_OrMissing_Null() + { + Assert.That(TsStringUtils.IsNullOrEmpty(null), Is.True, "null is null or empty"); + Assert.That(TsStringUtils.IsNullOrPlaceholder(null, "***"), Is.True, "null is null or placeholder"); + } + + [TestCase("", ExpectedResult = true)] + [TestCase("***", ExpectedResult = false)] + [TestCase("t", ExpectedResult = false)] + public bool IsNullOrEmpty(string actual) + { + return TsStringUtils.IsNullOrEmpty(TsStringUtils.MakeString(actual, m_wsf.UserWs)); + } + + [TestCase("", ExpectedResult = true)] + [TestCase("***", ExpectedResult = true)] + [TestCase("t", ExpectedResult = false)] + public bool IsNullOrPlaceholder(string actual) + { + return TsStringUtils.IsNullOrPlaceholder(TsStringUtils.MakeString(actual, m_wsf.UserWs), "***"); + } #endregion } } \ No newline at end of file diff --git a/tests/SIL.LCModel.Tests/DomainImpl/ScrTxtParaTests.cs b/tests/SIL.LCModel.Tests/DomainImpl/ScrTxtParaTests.cs index d75393fc..be724f38 100644 --- a/tests/SIL.LCModel.Tests/DomainImpl/ScrTxtParaTests.cs +++ b/tests/SIL.LCModel.Tests/DomainImpl/ScrTxtParaTests.cs @@ -549,23 +549,46 @@ public void Reference() AddDataToMatthew(); var para1 = (IStTxtPara) m_book.SectionsOS[1].ContentOA.ParagraphsOS[0]; // Actually ScrTxtPara var seg = para1.SegmentsOS[1]; // first content ref, after the chapter and verse number stuff. - Assert.That(para1.Reference(seg, seg.BeginOffset + 1).Text, Is.EqualTo("MAT 1:1")); + var v1Seg1Idx = seg.BeginOffset + 1; + Assert.That(para1.Reference(seg, v1Seg1Idx).Text, Is.EqualTo("MAT 1:1")); + Assert.That(para1.ReferenceForSorting(seg, v1Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00001 {v1Seg1Idx:D10}")); AddRunToMockedPara(para1, "Verse two second sentence.", null); - var v2seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse - Assert.That(para1.Reference(v2seg1, v2seg1.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2a")); - var v2seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse - Assert.That(para1.Reference(v2seg2, v2seg2.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2b")); + var v2Seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse + var v2Seg1Idx = v2Seg1.BeginOffset + 1; + Assert.That(para1.Reference(v2Seg1, v2Seg1Idx).Text, Is.EqualTo("MAT 1:2a")); + Assert.That(para1.ReferenceForSorting(v2Seg1, v2Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002a {v2Seg1Idx:D10}")); + var v2Seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse + var v2Seg2Idx = v2Seg2.BeginOffset + 1; + Assert.That(para1.Reference(v2Seg2, v2Seg2Idx).Text, Is.EqualTo("MAT 1:2b")); + Assert.That(para1.ReferenceForSorting(v2Seg2, v2Seg2Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002b {v2Seg2Idx:D10}")); IStTxtPara para2 = AddParaToMockedSectionContent((IScrSection)para1.Owner.Owner, ScrStyleNames.NormalParagraph); AddRunToMockedPara(para2, "Verse 2 seg 3", null); - var v2seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs. - Assert.That(para2.Reference(v2seg3, v2seg3.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2c")); + var v2Seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs. + var v2Seg3Idx = v2Seg3.BeginOffset + 1; + Assert.That(para2.Reference(v2Seg3, v2Seg3Idx).Text, Is.EqualTo("MAT 1:2c")); + Assert.That(para2.ReferenceForSorting(v2Seg3, v2Seg3Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002c {v2Seg3Idx:D10}")); var newSection = AddSectionToMockedBook(m_book); IStTxtPara para3 = AddParaToMockedSectionContent(newSection, ScrStyleNames.NormalParagraph); AddRunToMockedPara(para3, "Verse 2 seg 4", null); - var v2seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!). + var v2Seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!). + var v2Seg4Idx = v2Seg4.BeginOffset + 1; // JohnT: arguably this should give MAT 1:2d. The current implementation does not detect the // segments in the previous section. - Assert.That(para3.Reference(v2seg4, v2seg4.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2")); + Assert.That(para3.Reference(v2Seg4, v2Seg4Idx).Text, Is.EqualTo("MAT 1:2")); + Assert.That(para3.ReferenceForSorting(v2Seg4, v2Seg4Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002 {v2Seg4Idx:D10}")); + + var scrBook1Samuel = CreateBookData(9, "1 Samuel"); + var scrBookSusanna = CreateBookData(75/*?*/, "Susanna"); + // TODO (Hasso) 2022.03: Enoch or some other >100 book + } + + [TestCase("", ExpectedResult = "00000")] + [TestCase("9", ExpectedResult = "00009")] + [TestCase("176", ExpectedResult = "00176")] + [TestCase("31103", ExpectedResult = "31103")] + public string ZeroPadForStringComparison(string intInRef) + { + return ScrTxtPara.ZeroPadForStringComparison(intInRef); } #endregion diff --git a/tests/SIL.LCModel.Tests/DomainImpl/StTxtParaTests.cs b/tests/SIL.LCModel.Tests/DomainImpl/StTxtParaTests.cs index ca62f44e..4f337239 100644 --- a/tests/SIL.LCModel.Tests/DomainImpl/StTxtParaTests.cs +++ b/tests/SIL.LCModel.Tests/DomainImpl/StTxtParaTests.cs @@ -35,6 +35,84 @@ protected override void CreateTestData() } #endregion + #region Reference[ForSorting] method tests + [Test] + public void Reference_ForSorting() + { + var para1 = AddParaToMockedText(m_stText, null); + AddRunToMockedPara(para1, "This text is indexed. It is also segmented.", null); + + var para2 = AddParaToMockedText(m_stText, null); + AddRunToMockedPara(para2, "This is the second paragraph. It is runny. It has three sentences.", null); + + // SUT + var reference = para1.Reference(para1.SegmentsOS[0], 10); + var refForSort = para1.ReferenceForSorting(para1.SegmentsOS[0], 10); + Assert.That(reference.Text, Is.EqualTo("My Inter 1.1")); + Assert.That(refForSort.Text, Is.EqualTo("My Interlinear Text 0000000001.0000000001 0000000010")); + reference = para1.Reference(para1.SegmentsOS[1], 25); + refForSort = para1.ReferenceForSorting(para1.SegmentsOS[1], 25); + Assert.That(reference.Text, Is.EqualTo("My Inter 1.2")); + Assert.That(refForSort.Text, Is.EqualTo("My Interlinear Text 0000000001.0000000002 0000000025")); + reference = para2.Reference(para2.SegmentsOS[0], 5); + refForSort = para2.ReferenceForSorting(para2.SegmentsOS[0], 5); + Assert.That(reference.Text, Is.EqualTo("My Inter 2.1")); + Assert.That(refForSort.Text, Is.EqualTo("My Interlinear Text 0000000002.0000000001 0000000005")); + } + + [Test] + public void Reference_ForSorting_TextHasAbbr() + { + ((IText)m_stText.Owner).Abbreviation.set_String(Cache.DefaultVernWs, "MIT"); + + var para1 = AddParaToMockedText(m_stText, null); + AddRunToMockedPara(para1, "This text is indexed. It is also segmented.", null); + + var para2 = AddParaToMockedText(m_stText, null); + AddRunToMockedPara(para2, "This is the second paragraph that is in this text", null); + + // SUT + var reference = para1.Reference(para1.SegmentsOS[0], 10); + var refForSort = para1.ReferenceForSorting(para1.SegmentsOS[0], 10); + Assert.That(reference.Text, Is.EqualTo("MIT 1.1")); + Assert.That(refForSort.Text, Is.EqualTo("MIT 0000000001.0000000001 0000000010")); + reference = para1.Reference(para1.SegmentsOS[1], 25); + refForSort = para1.ReferenceForSorting(para1.SegmentsOS[1], 25); + Assert.That(reference.Text, Is.EqualTo("MIT 1.2")); + Assert.That(refForSort.Text, Is.EqualTo("MIT 0000000001.0000000002 0000000025")); + reference = para2.Reference(para2.SegmentsOS[0], 5); + refForSort = para2.ReferenceForSorting(para2.SegmentsOS[0], 5); + Assert.That(reference.Text, Is.EqualTo("MIT 2.1")); + Assert.That(refForSort.Text, Is.EqualTo("MIT 0000000002.0000000001 0000000005")); + reference = para2.Reference(para2.SegmentsOS[0], 34); + refForSort = para2.ReferenceForSorting(para2.SegmentsOS[0], 34); + Assert.That(reference.Text, Is.EqualTo("MIT 2.1")); + Assert.That(refForSort.Text, Is.EqualTo("MIT 0000000002.0000000001 0000000034")); + } + + [Test] + public void Reference_ForSorting_TextHasNoTitleOrAbbr() + { + var untitledText = AddInterlinearTextToLangProj(Strings.ksStars).ContentsOA; + + var para1 = AddParaToMockedText(untitledText, null); + AddRunToMockedPara(para1, "This is text.", null); + + // SUT + var reference = para1.Reference(para1.SegmentsOS[0], 5); + var refForSort = para1.ReferenceForSorting(para1.SegmentsOS[0], 5); + Assert.That(reference.Text, Is.EqualTo("1.1")); + Assert.That(refForSort.Text, Is.EqualTo(" 0000000001.0000000001 0000000005")); + } + + [TestCase(0, ExpectedResult = "0000000000")] + [TestCase(1, ExpectedResult = "0000000001")] + [TestCase(12, ExpectedResult = "0000000012")] + [TestCase(512, ExpectedResult = "0000000512")] + [TestCase(int.MaxValue, ExpectedResult = "2147483647")] + public string ZeroPadForStringComparison(int i) => StTxtPara.ZeroPadForStringComparison(i); + #endregion Reference[ForSorting] method tests + #region ReplaceTextRange method tests ///-------------------------------------------------------------------------------------- ///