Skip to content

Commit

Permalink
Change terminology to Embedded and Style
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Jan 23, 2025
1 parent ab480f1 commit 21864af
Show file tree
Hide file tree
Showing 13 changed files with 107 additions and 121 deletions.
21 changes: 8 additions & 13 deletions src/SIL.Machine/Corpora/IUsfmParserHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,29 +69,24 @@ IReadOnlyList<UsfmAttribute> attributes
void EndChar(UsfmParserState state, string marker, IReadOnlyList<UsfmAttribute> attributes, bool closed);

/// <summary>
/// Start of a sub component - a note, figure or cross reference
/// Start of an embedded - a note, figure or cross reference
/// </summary>
void StartSubComponent(UsfmParserState state, string marker, string caller, string category);
void StartEmbedded(UsfmParserState state, string marker, string caller, string category);

/// <summary>
/// End of a sub component
/// End of an embedded
/// </summary>
void EndSubComponent(
UsfmParserState state,
string marker,
IReadOnlyList<UsfmAttribute> attributes,
bool closed
);
void EndEmbedded(UsfmParserState state, string marker, IReadOnlyList<UsfmAttribute> attributes, bool closed);

/// <summary>
/// Start of a sub component text
/// Start of an embedded text
/// </summary>
void StartSubComponentText(UsfmParserState state);
void StartEmbeddedText(UsfmParserState state);

/// <summary>
/// End of a sub component text
/// End of an embedded text
/// </summary>
void EndSubComponentText(UsfmParserState state);
void EndEmbeddedText(UsfmParserState state);

/// <summary>
/// Start of a table
Expand Down
8 changes: 4 additions & 4 deletions src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ public string UpdateUsfm(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows,
string fullName = null,
UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting,
UpdateUsfmIntraVerseMarkerBehavior subComponentBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior formattingBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
UpdateUsfmIntraVerseMarkerBehavior embeddedBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior styleBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
)
{
string fileName = _settings.GetBookFileName(bookId);
Expand All @@ -42,8 +42,8 @@ public string UpdateUsfm(
rows,
fullName is null ? null : $"- {fullName}",
textBehavior,
subComponentBehavior,
formattingBehavior
embeddedBehavior,
styleBehavior
);
try
{
Expand Down
20 changes: 10 additions & 10 deletions src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ public enum ScriptureTextType
None,
NonVerse,
Verse,
SubComponent,
SubComponentText
Embedded,
EmbeddedText
}

public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
Expand Down Expand Up @@ -152,7 +152,7 @@ public override void EndSidebar(UsfmParserState state, string marker, bool close
EndParentElement();
}

public override void StartSubComponent(UsfmParserState state, string marker, string caller, string category)
public override void StartEmbedded(UsfmParserState state, string marker, string caller, string category)
{
if (_curVerseRef.IsDefault)
UpdateVerseRef(state.VerseRef, marker);
Expand All @@ -165,15 +165,15 @@ public override void StartSubComponent(UsfmParserState state, string marker, str
}
}

public override void StartSubComponentText(UsfmParserState state)
public override void StartEmbeddedText(UsfmParserState state)
{
_curTextType.Push(ScriptureTextType.SubComponentText);
StartSubComponentText(state, CreateNonVerseRef());
_curTextType.Push(ScriptureTextType.EmbeddedText);
StartEmbeddedText(state, CreateNonVerseRef());
}

public override void EndSubComponentText(UsfmParserState state)
public override void EndEmbeddedText(UsfmParserState state)
{
EndSubComponentText(state, CreateNonVerseRef());
EndEmbeddedText(state, CreateNonVerseRef());
_curTextType.Pop();
}

Expand Down Expand Up @@ -209,9 +209,9 @@ protected virtual void StartNonVerseText(UsfmParserState state, ScriptureRef scr

protected virtual void EndNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { }

protected virtual void StartSubComponentText(UsfmParserState state, ScriptureRef scriptureRef) { }
protected virtual void StartEmbeddedText(UsfmParserState state, ScriptureRef scriptureRef) { }

protected virtual void EndSubComponentText(UsfmParserState state, ScriptureRef scriptureRef) { }
protected virtual void EndEmbeddedText(UsfmParserState state, ScriptureRef scriptureRef) { }

private void StartVerseText(UsfmParserState state)
{
Expand Down
49 changes: 24 additions & 25 deletions src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
private readonly List<UsfmToken> _newTokens;
private readonly string _idText;
private readonly UpdateUsfmTextBehavior _textBehavior;
private readonly UpdateUsfmIntraVerseMarkerBehavior _subComponentBehavior;
private readonly UpdateUsfmIntraVerseMarkerBehavior _formattingBehavior;
private readonly UpdateUsfmIntraVerseMarkerBehavior _embeddedBehavior;
private readonly UpdateUsfmIntraVerseMarkerBehavior _styleBehavior;
private readonly Stack<bool> _replace;
private int _rowIndex;
private int _tokenIndex;
Expand All @@ -38,8 +38,8 @@ public UpdateUsfmParserHandler(
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows = null,
string idText = null,
UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting,
UpdateUsfmIntraVerseMarkerBehavior subComponentBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior formattingBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
UpdateUsfmIntraVerseMarkerBehavior embeddedBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
UpdateUsfmIntraVerseMarkerBehavior styleBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
)
{
_rows = rows ?? Array.Empty<(IReadOnlyList<ScriptureRef>, string)>();
Expand All @@ -48,8 +48,8 @@ public UpdateUsfmParserHandler(
_idText = idText;
_replace = new Stack<bool>();
_textBehavior = textBehavior;
_subComponentBehavior = subComponentBehavior;
_formattingBehavior = formattingBehavior;
_embeddedBehavior = embeddedBehavior;
_styleBehavior = styleBehavior;
}

public IReadOnlyList<UsfmToken> Tokens => _tokens;
Expand Down Expand Up @@ -196,31 +196,31 @@ bool closed
base.EndChar(state, marker, attributes, closed);
}

public override void StartSubComponent(UsfmParserState state, string marker, string caller, string category)
public override void StartEmbedded(UsfmParserState state, string marker, string caller, string category)
{
// strip out notes in verses that are being replaced
if (ReplaceWithNewTokens(state))
SkipTokens(state);
else
CollectTokens(state);

base.StartSubComponent(state, marker, caller, category);
base.StartEmbedded(state, marker, caller, category);
}

public override void EndSubComponent(
public override void EndEmbedded(
UsfmParserState state,
string marker,
IReadOnlyList<UsfmAttribute> attributes,
bool closed
)
{
// strip out notes in verses that are being replaced
if (ReplaceWithNewTokens(state, closed: closed, endSubComponent: true))
if (ReplaceWithNewTokens(state, closed: closed, endEmbedded: true))
SkipTokens(state);
else
CollectTokens(state);

base.EndSubComponent(state, marker, attributes, closed);
base.EndEmbedded(state, marker, attributes, closed);
}

public override void Ref(UsfmParserState state, string marker, string display, string target)
Expand Down Expand Up @@ -289,13 +289,13 @@ protected override void EndNonVerseText(UsfmParserState state, ScriptureRef scri
PopNewTokens();
}

protected override void StartSubComponentText(UsfmParserState state, ScriptureRef scriptureRef)
protected override void StartEmbeddedText(UsfmParserState state, ScriptureRef scriptureRef)
{
IReadOnlyList<string> rowTexts = AdvanceRows(new[] { scriptureRef });
PushNewTokens(rowTexts.Select(t => new UsfmToken(t + " ")));
}

protected override void EndSubComponentText(UsfmParserState state, ScriptureRef scriptureRef)
protected override void EndEmbeddedText(UsfmParserState state, ScriptureRef scriptureRef)
{
PopNewTokens();
}
Expand Down Expand Up @@ -365,7 +365,7 @@ private void SkipTokens(UsfmParserState state)
_tokenIndex = state.Index + 1 + state.SpecialTokenCount;
}

private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, bool endSubComponent = false)
private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, bool endEmbedded = false)
{
bool untranslatableParagraph =
state.ParaTag?.Marker != null && UsfmStylesheet.IsUntranslatedParagraph(state.ParaTag.Marker);
Expand All @@ -379,11 +379,10 @@ private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, boo
}

bool newText = _replace.Count > 0 && _replace.Peek();
bool inSubComponent = state.SubComponentTag != null || endSubComponent;
bool inSubComponentText =
CurrentTextType == ScriptureTextType.SubComponentText
&& !UsfmStylesheet.IsSubComponentText(state.Token.Marker);
bool isFormattingTag = state.Token.Marker != null && !UsfmStylesheet.IsSubComponentPart(state.Token.Marker);
bool inEmbedded = state.EmbeddedTag != null || endEmbedded;
bool inEmbeddedText =
CurrentTextType == ScriptureTextType.EmbeddedText && !UsfmStylesheet.IsEmbeddedText(state.Token.Marker);
bool isStyleTag = state.Token.Marker != null && !UsfmStylesheet.IsEmbeddedPart(state.Token.Marker);

bool existingText = state
.Tokens.Skip(_tokenIndex)
Expand All @@ -394,7 +393,7 @@ private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, boo
!untranslatableParagraph
&& newText
&& (!existingText || _textBehavior == UpdateUsfmTextBehavior.PreferNew)
&& (!inSubComponent || inSubComponentText);
&& (!inEmbedded || inEmbeddedText);

if (useNewTokens)
AddNewTokens();
Expand All @@ -404,20 +403,20 @@ private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true, boo

// figure out when to skip the existing text
bool withinNewText = _replace.Any(r => r);
if (withinNewText && inSubComponent)
if (withinNewText && inEmbedded)
{
if (_subComponentBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip)
if (_embeddedBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip)
return true;

if (!inSubComponentText)
if (!inEmbeddedText)
return false;
}

bool skipTokens = useNewTokens && closed;

if (newText && isFormattingTag)
if (newText && isStyleTag)
{
skipTokens = _formattingBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip;
skipTokens = _styleBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip;
}
return skipTokens;
}
Expand Down
50 changes: 24 additions & 26 deletions src/SIL.Machine/Corpora/UsfmParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -245,18 +245,16 @@ public bool ProcessToken()
if (paraTag != null && paraTag.TextType != UsfmTextType.VerseText && paraTag.TextType != 0)
CloseAll();
else
CloseSubComponent();
CloseEmbedded();
break;
case UsfmTokenType.SubComponent:
CloseSubComponent();
case UsfmTokenType.Embedded:
CloseEmbedded();
break;
case UsfmTokenType.End:
// If end marker for an active note
if (
State.Stack.Any(e => e.Type == UsfmElementType.SubComponent && (e.Marker + "*" == token.Marker))
)
if (State.Stack.Any(e => e.Type == UsfmElementType.Embedded && (e.Marker + "*" == token.Marker)))
{
CloseSubComponent(closed: true);
CloseEmbedded(closed: true);
break;
}

Expand Down Expand Up @@ -489,7 +487,7 @@ public bool ProcessToken()
break;
}

if (UsfmStylesheet.IsSubComponentPart(token.Marker))
if (UsfmStylesheet.IsEmbeddedPart(token.Marker))
CloseNoteText();

string actualMarker;
Expand All @@ -514,14 +512,14 @@ public bool ProcessToken()
token.Attributes
);

if (IsSubComponentText(token))
if (IsEmbeddedText(token))
{
// Note text should be handled as a full segment
State.Push(new UsfmParserElement(UsfmElementType.SubComponentText, token.Marker));
Handler?.StartSubComponentText(State);
State.Push(new UsfmParserElement(UsfmElementType.EmbeddedText, token.Marker));
Handler?.StartEmbeddedText(State);
}
break;
case UsfmTokenType.SubComponent:
case UsfmTokenType.Embedded:
// Look for category
string noteCategory = null;
if (
Expand All @@ -536,9 +534,9 @@ public bool ProcessToken()
State.SpecialTokenCount += 3;
}

State.Push(new UsfmParserElement(UsfmElementType.SubComponent, token.Marker, token.Attributes));
State.Push(new UsfmParserElement(UsfmElementType.Embedded, token.Marker, token.Attributes));

Handler?.StartSubComponent(State, token.Marker, token.Data, noteCategory);
Handler?.StartEmbedded(State, token.Marker, token.Data, noteCategory);
break;
case UsfmTokenType.Text:
string text = token.Text;
Expand Down Expand Up @@ -608,15 +606,15 @@ public void CloseAll()
private UsfmTokenType DetermineUnknownTokenType()
{
// Unknown inside notes are character
if (State.Stack.Any(e => e.Type == UsfmElementType.SubComponent))
if (State.Stack.Any(e => e.Type == UsfmElementType.Embedded))
return UsfmTokenType.Character;

return UsfmTokenType.Paragraph;
}

private void CloseSubComponent(bool closed = false)
private void CloseEmbedded(bool closed = false)
{
if (State.Stack.Any(elem => elem.Type == UsfmElementType.SubComponent))
if (State.Stack.Any(elem => elem.Type == UsfmElementType.Embedded))
{
UsfmParserElement elem;
do
Expand All @@ -625,14 +623,14 @@ private void CloseSubComponent(bool closed = false)
break;

elem = State.Peek();
CloseElement(closed && elem.Type == UsfmElementType.SubComponent);
} while (elem.Type != UsfmElementType.SubComponent);
CloseElement(closed && elem.Type == UsfmElementType.Embedded);
} while (elem.Type != UsfmElementType.Embedded);
}
}

private void CloseNoteText()
{
while (State.Stack.Count > 0 && State.Peek().Type == UsfmElementType.SubComponentText)
while (State.Stack.Count > 0 && State.Peek().Type == UsfmElementType.EmbeddedText)
CloseElement();
}

Expand All @@ -656,11 +654,11 @@ private void CloseElement(bool closed = false)
case UsfmElementType.Char:
Handler?.EndChar(State, element.Marker, element.Attributes, closed);
break;
case UsfmElementType.SubComponent:
Handler?.EndSubComponent(State, element.Marker, element.Attributes, closed);
case UsfmElementType.Embedded:
Handler?.EndEmbedded(State, element.Marker, element.Attributes, closed);
break;
case UsfmElementType.SubComponentText:
Handler?.EndSubComponentText(State);
case UsfmElementType.EmbeddedText:
Handler?.EndEmbeddedText(State);
break;
case UsfmElementType.Table:
Handler?.EndTable(State);
Expand Down Expand Up @@ -694,9 +692,9 @@ private bool IsRef(UsfmToken token)
&& (token.Marker == "ref");
}

private bool IsSubComponentText(UsfmToken token)
private bool IsEmbeddedText(UsfmToken token)
{
return UsfmStylesheet.IsSubComponentText(token.Marker) && State.SubComponentTag != null;
return UsfmStylesheet.IsEmbeddedText(token.Marker) && State.EmbeddedTag != null;
}
}
}
Loading

0 comments on commit 21864af

Please sign in to comment.