Skip to content

Commit

Permalink
[API] LT-8457: Sort References Properly
Browse files Browse the repository at this point in the history
* Add IStTxtPara.ReferenceForSorting(ISegment, int) to
 compute a computer-sortable reference string
* Add TsStringUtils.IsNullOrPlaceholder(ITsString toTest, string placeholder)

Part of https://jira.sil.org/browse/LT-8457
+semver: minor
  • Loading branch information
papeh committed Mar 29, 2022
1 parent 656e804 commit 89848af
Show file tree
Hide file tree
Showing 8 changed files with 276 additions and 30 deletions.
1 change: 1 addition & 0 deletions LCM.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<s:String x:Key="/Default/PatternsAndTemplates/Todo/TodoPatterns/=C6562928DAAA5C419C0A4E5109498163/Pattern/@EntryValue">(?&lt;=\W|^)(?&lt;TAG&gt;REVIEW)(\W|$)(.*)</s:String>
<s:String x:Key="/Default/PatternsAndTemplates/Todo/TodoPatterns/=C6562928DAAA5C419C0A4E5109498163/TodoIconStyle/@EntryValue">Normal</s:String>
<s:Boolean x:Key="/Default/UserDictionary/Words/=analyses/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=bldr/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Charis/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Duolos/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=flid/@EntryIndexedValue">True</s:Boolean>
Expand Down
10 changes: 9 additions & 1 deletion src/SIL.LCModel.Core/Text/TsStringUtils.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2004-2020 SIL International
// Copyright (c) 2004-2022 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

Expand Down Expand Up @@ -1856,6 +1856,14 @@ public static bool IsNullOrEmpty(ITsString testMe)
{
return testMe == null || testMe.Length <= 0;
}

/// <returns>
/// True if the string is null, empty, or a placeholder (e.g. ***)
/// </returns>
public static bool IsNullOrPlaceholder(ITsString testMe, string placeholder)
{
return IsNullOrEmpty(testMe) || testMe.Text.Equals(placeholder);
}
}
#endregion

Expand Down
72 changes: 64 additions & 8 deletions src/SIL.LCModel/DomainImpl/ScrTxtPara.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// Copyright (c) 2003-2018 SIL International
// Copyright (c) 2003-2022 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using Icu;
using SIL.LCModel.Core.KernelInterfaces;
using SIL.LCModel.Core.Scripture;
Expand Down Expand Up @@ -2117,25 +2120,78 @@ public override ITsString Reference(ISegment seg, int ich)
{
var stText = Owner as IStText;
if (stText == null)
return Cache.MakeUserTss("unknown"); // should never happen, I think?
return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think?
if (stText.OwningFlid == ScrSectionTags.kflidContent)
{
// Body of Scripture. Figure a book/chapter/verse
IScrBook book = (IScrBook) stText.Owner.Owner;
string mainRef = ScriptureServices.FullScrRef(this, ich, book.BestUIAbbrev).Trim();
return Cache.MakeUserTss(mainRef + ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg)));
}
if (stText.OwningFlid == ScrSectionTags.kflidHeading)
//if (stText.OwningFlid == ScrSectionTags.kflidHeading)
//{
// // use the section title without qualifiers.
// return stText.Title.BestVernacularAnalysisAlternative;
//}
//if (stText.OwningFlid == ScrBookTags.kflidTitle)
//{
// return stText.Title.BestVernacularAnalysisAlternative;
//}
return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think?
}

/// <inheritdoc/>
public override ITsString ReferenceForSorting(ISegment seg, int ich)
{
if (!(Owner is IStText stText))
{
// use the section title without qualifiers.
return stText.Title.BestVernacularAnalysisAlternative;
return Scripture.Name.NotFoundTss;
}

// Use a prefix to make scripture references sort together when mixed with other references
// (Scripture is sorted canonically, but a comparer for mixed references would sort alphabetically)
var bldr = new StringBuilder(RefForSortingPrefix);
switch (stText.OwningFlid)
{
case ScrSectionTags.kflidContent:
var book = (IScrBook)stText.Owner.Owner;
// Append the book number to sort in canonical order.
bldr.Append(book.CanonicalNum);
// Append the book name. It makes no difference for sorting, but could make debugging easier.
bldr.Append("_").Append(book.BestUIAbbrev);

var refSansBookBldr = new StringBuilder(ScriptureServices.FullScrRef(this, ich, string.Empty).Trim());
var numbersInRef = new Regex(@"\d+").Matches(refSansBookBldr.ToString());
foreach (var number in numbersInRef.Cast<Match>().Reverse())
{
ZeroPadForStringComparison(refSansBookBldr, number.Index, number.Length);
}
bldr.Append(" ").Append(refSansBookBldr).Append(ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg)));

// add ich
bldr.Append(" ").Append(ZeroPadForStringComparison(ich));
return Cache.MakeUserTss(bldr.ToString());
}
if (stText.OwningFlid == ScrBookTags.kflidTitle)
return Cache.MakeUserTss(Strings.ksStars);
}

protected internal const string RefForSortingPrefix = "0 Scr ";

protected internal static void ZeroPadForStringComparison(StringBuilder bldr, int index, int cExistingDigits)
{
for (var remaining = 5 - cExistingDigits; remaining > 0; remaining--)
{
return stText.Title.BestVernacularAnalysisAlternative;
bldr.Insert(index, "0");
}
return Cache.MakeUserTss("unknown"); // should never happen, I think?
}

protected internal static string ZeroPadForStringComparison(string intInRef)
{
var bldr = new StringBuilder(intInRef);
ZeroPadForStringComparison(bldr, 0, intInRef.Length);
return bldr.ToString();
}

/// ------------------------------------------------------------------------------------
/// <summary>
/// Gets the footnote sequence.
Expand Down
58 changes: 53 additions & 5 deletions src/SIL.LCModel/DomainImpl/StTxtPara.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public IStTxtPara PreviousParagraph
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// (For now, ich is not actually used, but it may become important if we decide not to split segements for
/// (For now, ich is not actually used, but it may become important if we decide not to split segments for
/// verse numbers.)
/// Overridden in ScrTxtPara to handle special cases for Scripture refs.
/// </summary>
Expand Down Expand Up @@ -100,10 +100,6 @@ public virtual ITsString Reference(ISegment seg, int ich)
if (bldr.Length > 0)
bldr.Replace(bldr.Length, bldr.Length, " ", props);

// if Scripture.IsResponsibleFor(stText) we should try to get the verse number of the annotation.
//if (stText.OwningFlid == (int)Text.TextTags.kflidContents)
//{

// Insert paragraph number.
int ipara = stText.ParagraphsOS.IndexOf(this) + 1;
bldr.Replace(bldr.Length, bldr.Length, ipara.ToString(), props);
Expand All @@ -117,6 +113,58 @@ public virtual ITsString Reference(ISegment seg, int ich)
return bldr.GetString();
}

/// <inheritdoc/>
public virtual ITsString ReferenceForSorting(ISegment seg, int ich)
{
if (!(Owner is IStText stText))
{
return TsStringUtils.EmptyString(Cache.DefaultUserWs);
}

ITsString tssName = null;
var fUsingAbbr = false;
if (stText.Owner is IText text)
{
tssName = text.Abbreviation.BestVernacularAnalysisAlternative;
if (!TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text))
{
fUsingAbbr = true;
}
}
if (!fUsingAbbr)
{
tssName = stText.Title.BestVernacularAnalysisAlternative;
}

// Make a TsTextProps specifying only the writing system.
var propBldr = TsStringUtils.MakePropsBldr();
var wsActual = tssName.get_Properties(0).GetIntPropValues((int)FwTextPropType.ktptWs, out _);
propBldr.SetIntPropValues((int)FwTextPropType.ktptWs, (int)FwTextPropVar.ktpvDefault, wsActual);
var props = propBldr.GetTextProps();

var bldr = TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text) ? new TsStrBldr() : tssName.GetBldr();

// Start with a space even if we don't have a title, so untitled texts sort to the top.
bldr.Append(" ", props);

// Insert paragraph and segment numbers.
var iPara = stText.ParagraphsOS.IndexOf(this) + 1;
var iSeg = SegmentsOS.IndexOf(seg) + 1;
bldr.Append(ZeroPadForStringComparison(iPara), props).Append(".", props).Append(ZeroPadForStringComparison(iSeg), props);

// Insert the offset so that two references in the same segment are sorted properly (LT-8457)
bldr.Append(" ", props).Append(ZeroPadForStringComparison(ich), props);

return bldr.GetString();
}

/// <summary>Pads the given int with zeroes to the max length of an int</summary>
protected internal static string ZeroPadForStringComparison(int i)
{
// because int.MaxValue.ToString().Length is 10
return i.ToString("D10");
}

/// ------------------------------------------------------------------------------------
/// <summary>
/// Finds the ORC of the specified picture and deletes it from the paragraph and any
Expand Down
22 changes: 16 additions & 6 deletions src/SIL.LCModel/InterfaceAdditions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3136,15 +3136,25 @@ IStTxtPara PreviousParagraph
List<IConstChartWordGroup> GetChartCellRefs();

/// ------------------------------------------------------------------------------------
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// (For now, ich is not actually used, but it may become important if we decide not to split segements for
/// verse numbers.)
/// </summary>
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// (For now, ich is not actually used, but it may become important if we decide not to split segments for
/// verse numbers.)
/// </summary>
/// ------------------------------------------------------------------------------------
ITsString Reference(ISegment seg, int ich);

/// ------------------------------------------------------------------------------------
/// <summary>
/// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
/// To allow greater accuracy and precision in sorting, numbers are zero-padded to the length of <see cref="int.MaxValue"/> and ich
/// is included at the end.
/// </summary>
/// ------------------------------------------------------------------------------------
ITsString ReferenceForSorting(ISegment seg, int ich);

/// ------------------------------------------------------------------------------------
/// <summary>
/// Splits the paragraph at the specified character index.
Expand Down
24 changes: 23 additions & 1 deletion tests/SIL.LCModel.Core.Tests/Text/TsStringUtilsTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2004-2021 SIL International
// Copyright (c) 2004-2022 SIL International
// This software is licensed under the LGPL, version 2.1 or later
// (http://www.gnu.org/licenses/lgpl-2.1.html)

Expand Down Expand Up @@ -2110,6 +2110,28 @@ public void RemoveIllegalXmlChars()
Assert.That(TsStringUtils.RemoveIllegalXmlChars(outOfOrderSurrogates).Text, Is.EqualTo("\xd800\xdc00z"));
}

[Test]
public void IsNull_OrMissing_Null()
{
Assert.That(TsStringUtils.IsNullOrEmpty(null), Is.True, "null is null or empty");
Assert.That(TsStringUtils.IsNullOrPlaceholder(null, "***"), Is.True, "null is null or placeholder");
}

[TestCase("", ExpectedResult = true)]
[TestCase("***", ExpectedResult = false)]
[TestCase("t", ExpectedResult = false)]
public bool IsNullOrEmpty(string actual)
{
return TsStringUtils.IsNullOrEmpty(TsStringUtils.MakeString(actual, m_wsf.UserWs));
}

[TestCase("", ExpectedResult = true)]
[TestCase("***", ExpectedResult = true)]
[TestCase("t", ExpectedResult = false)]
public bool IsNullOrPlaceholder(string actual)
{
return TsStringUtils.IsNullOrPlaceholder(TsStringUtils.MakeString(actual, m_wsf.UserWs), "***");
}
#endregion
}
}
41 changes: 32 additions & 9 deletions tests/SIL.LCModel.Tests/DomainImpl/ScrTxtParaTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -549,23 +549,46 @@ public void Reference()
AddDataToMatthew();
var para1 = (IStTxtPara) m_book.SectionsOS[1].ContentOA.ParagraphsOS[0]; // Actually ScrTxtPara
var seg = para1.SegmentsOS[1]; // first content ref, after the chapter and verse number stuff.
Assert.That(para1.Reference(seg, seg.BeginOffset + 1).Text, Is.EqualTo("MAT 1:1"));
var v1Seg1Idx = seg.BeginOffset + 1;
Assert.That(para1.Reference(seg, v1Seg1Idx).Text, Is.EqualTo("MAT 1:1"));
Assert.That(para1.ReferenceForSorting(seg, v1Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00001 {v1Seg1Idx:D10}"));
AddRunToMockedPara(para1, "Verse two second sentence.", null);
var v2seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse
Assert.That(para1.Reference(v2seg1, v2seg1.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2a"));
var v2seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse
Assert.That(para1.Reference(v2seg2, v2seg2.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2b"));
var v2Seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse
var v2Seg1Idx = v2Seg1.BeginOffset + 1;
Assert.That(para1.Reference(v2Seg1, v2Seg1Idx).Text, Is.EqualTo("MAT 1:2a"));
Assert.That(para1.ReferenceForSorting(v2Seg1, v2Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002a {v2Seg1Idx:D10}"));
var v2Seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse
var v2Seg2Idx = v2Seg2.BeginOffset + 1;
Assert.That(para1.Reference(v2Seg2, v2Seg2Idx).Text, Is.EqualTo("MAT 1:2b"));
Assert.That(para1.ReferenceForSorting(v2Seg2, v2Seg2Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002b {v2Seg2Idx:D10}"));
IStTxtPara para2 = AddParaToMockedSectionContent((IScrSection)para1.Owner.Owner, ScrStyleNames.NormalParagraph);
AddRunToMockedPara(para2, "Verse 2 seg 3", null);
var v2seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs.
Assert.That(para2.Reference(v2seg3, v2seg3.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2c"));
var v2Seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs.
var v2Seg3Idx = v2Seg3.BeginOffset + 1;
Assert.That(para2.Reference(v2Seg3, v2Seg3Idx).Text, Is.EqualTo("MAT 1:2c"));
Assert.That(para2.ReferenceForSorting(v2Seg3, v2Seg3Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002c {v2Seg3Idx:D10}"));
var newSection = AddSectionToMockedBook(m_book);
IStTxtPara para3 = AddParaToMockedSectionContent(newSection, ScrStyleNames.NormalParagraph);
AddRunToMockedPara(para3, "Verse 2 seg 4", null);
var v2seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!).
var v2Seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!).
var v2Seg4Idx = v2Seg4.BeginOffset + 1;
// JohnT: arguably this should give MAT 1:2d. The current implementation does not detect the
// segments in the previous section.
Assert.That(para3.Reference(v2seg4, v2seg4.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2"));
Assert.That(para3.Reference(v2Seg4, v2Seg4Idx).Text, Is.EqualTo("MAT 1:2"));
Assert.That(para3.ReferenceForSorting(v2Seg4, v2Seg4Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002 {v2Seg4Idx:D10}"));

var scrBook1Samuel = CreateBookData(9, "1 Samuel");
var scrBookSusanna = CreateBookData(75/*?*/, "Susanna");
// TODO (Hasso) 2022.03: Enoch or some other >100 book
}

[TestCase("", ExpectedResult = "00000")]
[TestCase("9", ExpectedResult = "00009")]
[TestCase("176", ExpectedResult = "00176")]
[TestCase("31103", ExpectedResult = "31103")]
public string ZeroPadForStringComparison(string intInRef)
{
return ScrTxtPara.ZeroPadForStringComparison(intInRef);
}
#endregion

Expand Down
Loading

0 comments on commit 89848af

Please sign in to comment.