Skip to content

Commit

Permalink
calculate offset based on each morpheme's one
Browse files Browse the repository at this point in the history
  • Loading branch information
mh-northlander committed Nov 6, 2024
1 parent 85a66af commit a598575
Showing 1 changed file with 21 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,11 @@ private void setAUnitAttribute() {
posIncAtt.setPositionIncrement(1);
}

int startOffset = subunits.offset();
Morpheme morpheme = subunits.next();
int endOffset = subunits.offset();
termAtt.setEmpty().append(morpheme.surface());
morphemeAtt.setMorpheme(morpheme);
morphemeAtt.setOffsets(offsetMap.subList(startOffset, endOffset + 1));
offsetAtt.setOffset(correctOffset(startOffset), correctOffset(endOffset));
MorphemeSubunits.Subunit su = subunits.next();
termAtt.setEmpty().append(su.morpheme.surface());
morphemeAtt.setMorpheme(su.morpheme);
morphemeAtt.setOffsets(offsetMap.subList(su.begin, su.end + 1));
offsetAtt.setOffset(correctOffset(su.begin), correctOffset(su.end));
}

private void setOOVAttribute() {
Expand Down Expand Up @@ -194,29 +192,40 @@ public int offset() {
}

static class MorphemeSubunits {
static class Subunit {
final Morpheme morpheme;
final int begin;
final int end;

public Subunit(Morpheme morpheme, int begin, int end) {
this.morpheme = morpheme;
this.begin = begin;
this.end = end;
}
}

private List<Morpheme> morphemes;
private int size;
private int index;
private int offset;
private int baseOffset;

public void setUnits(List<Morpheme> morphemes) {
this.morphemes = morphemes;
size = morphemes.size();
index = 0;
offset = 0;
baseOffset = morphemes.get(0).begin();
}

public boolean hasNext() {
return index < size;
}

public Morpheme next() {
public Subunit next() {
if (!hasNext()) {
throw new IllegalStateException();
}
Morpheme m = morphemes.get(index++);
offset += m.end() - m.begin();
return m;
return new Subunit(m, m.begin() - baseOffset, m.end() - baseOffset);
}

public int size() {
Expand All @@ -226,9 +235,5 @@ public int size() {
public int index() {
return index;
}

public int offset() {
return offset;
}
}
}

0 comments on commit a598575

Please sign in to comment.