Skip to content

Commit

Permalink
Wrote necessary plus some additional tests to
Browse files Browse the repository at this point in the history
investigate possible alignment and versification
issues with deuterocanonical books
  • Loading branch information
mudiagaobrikisil committed Feb 5, 2025
1 parent 3a9b17e commit e193846
Show file tree
Hide file tree
Showing 30 changed files with 17,006 additions and 1 deletion.
32 changes: 32 additions & 0 deletions src/SIL.Machine/Scripture/ScVersExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using System.Collections.Generic;

namespace SIL.Scripture.Extensions
{
public static class ScrVersExtensions
{
/// <summary>
/// Gets a list of references (verse references) for the specified book.
/// </summary>
public static IEnumerable<VerseRef> GetReferencesForBook(this ScrVers scrVers, int bookNum)
{
List<VerseRef> references = new List<VerseRef>();
int lastChapter = scrVers.GetLastChapter(bookNum);

for (int chapterNum = 1; chapterNum <= lastChapter; chapterNum++)
{
int lastVerse = scrVers.GetLastVerse(bookNum, chapterNum);

for (int verseNum = 1; verseNum <= lastVerse; verseNum++)
{
int bbbcccvvv = VerseRef.GetBBBCCCVVV(bookNum, chapterNum, verseNum);
if (!scrVers.IsExcluded(bbbcccvvv))
{
references.Add(new VerseRef(bookNum, chapterNum, verseNum, scrVers));
}
}
}

return references;
}
}
}
132 changes: 132 additions & 0 deletions tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
using System.IO.Compression;
using System.Text.RegularExpressions;
using NUnit.Framework.Constraints;
using SIL.Scripture;

namespace SIL.Machine.Corpora;

Expand All @@ -18,6 +20,16 @@ internal static class CorporaTestHelpers
public static readonly string UsfmSourceProjectPath = Path.Combine(TestDataPath, "usfm", "source");
public static readonly string UsxTestProjectPath = Path.Combine(TestDataPath, "usx", "Tes");
public static readonly string TextTestProjectPath = Path.Combine(TestDataPath, "txt");
public static readonly string DeuterocanonicalsSourcePath = Path.Combine(
TestDataPath,
"deuterocanonicals",
"source"
);
public static readonly string DeuterocanonicalsTargetPath = Path.Combine(
TestDataPath,
"deuterocanonicals",
"target"
);

public static string CreateTestDblBundle()
{
Expand All @@ -43,4 +55,124 @@ public static EqualConstraint IgnoreLineEndings(this EqualConstraint constraint)
(actual, expected) => actual.ReplaceLineEndings() == expected.ReplaceLineEndings()
);
}

/// <summary>
/// Sets up and returns the source corpus.
/// </summary>
/// <returns>The source corpus.</returns>
public static ParatextTextCorpus GetDeuterocanonicalSourceCorpus()
{
return new ParatextTextCorpus(CorporaTestHelpers.DeuterocanonicalsSourcePath, includeAllText: true);
}

/// <summary>
/// Sets up and returns the target corpus.
/// </summary>
/// <returns>The target corpus.</returns>
public static ParatextTextCorpus GetDeuterocanonicalTargetCorpus()
{
return new ParatextTextCorpus(CorporaTestHelpers.DeuterocanonicalsTargetPath, includeAllText: true);
}

/// <summary>
/// Expands a hyphenated verse range (e.g., "S3Y 1:1-29") into individual verses.
/// </summary>
public static IEnumerable<ScriptureRef> ExpandVerseRange(string verseRange, ScrVers versification)
{
var parts = verseRange.Split(':');
var bookAndChapter = parts[0].Trim();
var verses = parts[1];

if (verses.Contains('-'))
{
var rangeParts = verses.Split('-').Select(int.Parse).ToArray();
var startVerse = rangeParts[0];
var endVerse = rangeParts[1];

for (int verse = startVerse; verse <= endVerse; verse++)
{
yield return ScriptureRef.Parse($"{bookAndChapter}:{verse}", versification);
}
}
else
{
yield return ScriptureRef.Parse(verseRange, versification);
}
}

public static Dictionary<string, string> ExpandVerseMappings(Dictionary<string, string> mappings)
{
var expandedMappings = new Dictionary<string, string>();

foreach (var mapping in mappings)
{
var sourceParts = ParseRange(mapping.Key);
var targetParts = ParseRange(mapping.Value);

// Check if either source or target is a single verse
if (sourceParts.IsSingleVerse && targetParts.IsSingleVerse)
{
expandedMappings[mapping.Key] = mapping.Value;
continue;
}

int sourceVerseCount = sourceParts.EndVerse - sourceParts.StartVerse + 1;
int targetVerseCount = targetParts.EndVerse - targetParts.StartVerse + 1;

if (sourceVerseCount != targetVerseCount)
{
throw new InvalidOperationException(
"Source and target verse ranges must have the same number of verses."
);
}

for (int i = 0; i < sourceVerseCount; i++)
{
string sourceVerse = $"{sourceParts.Book} {sourceParts.Chapter}:{sourceParts.StartVerse + i}";
string targetVerse = $"{targetParts.Book} {targetParts.Chapter}:{targetParts.StartVerse + i}";

expandedMappings[sourceVerse] = targetVerse;
}
}

return expandedMappings;
}

public static (string Book, int Chapter, int StartVerse, int EndVerse, bool IsSingleVerse) ParseRange(string range)
{
var parts = range.Split(' ');
var book = parts[0];

var chapterAndVerses = parts[1].Split(':');
int chapter = int.Parse(chapterAndVerses[0]);

var verseRange = chapterAndVerses[1].Split('-');

int startVerse = int.Parse(verseRange[0]);
int endVerse = verseRange.Length > 1 ? int.Parse(verseRange[1]) : startVerse;

bool isSingleVerse = startVerse == endVerse;

return (book, chapter, startVerse, endVerse, isSingleVerse);
}

/// <summary>
/// Removes unwanted characters in a corpus string.
/// </summary>
public static string CleanString(string input, string[] unwanted)
{
foreach (var item in unwanted)
{
input = input.Replace(item, "").Trim();
}
return input;
}

/// <summary>
/// Replace multiple spaces with a single space.
/// </summary>
public static string NormalizeSpaces(string input)
{
return Regex.Replace(input, @"\s+", " ");
}
}
Loading

0 comments on commit e193846

Please sign in to comment.