Skip to content

Commit e261edb

Browse files
committed
Merge remote-tracking branch 'origin/master'
2 parents b41f385 + 11119ef commit e261edb

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

sign_language_datasets/datasets/dgs_corpus/dgs_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33

44
def get_elan_sentences(elan_path: str):
5+
56
eaf = pympi.Elan.Eaf(elan_path) # TODO add "suppress_version_warning=True" when pympi 1.7 is released
67

78
timeslots = eaf.timeslots
@@ -34,6 +35,18 @@ def get_elan_sentences(elan_path: str):
3435

3536
all_glosses += list(gloss.values())
3637

38+
all_mouthings = []
39+
40+
tier_name = "Mundbild_Mundgestik_" + participant
41+
items = eaf.tiers[tier_name][0]
42+
43+
# structure of entries:
44+
# {'a2768296': ('ts42', 'ts43', 'tochter', None), ... }
45+
46+
for s, e, val, _ in items.values():
47+
mouthing_entry = {"start": timeslots[s], "end": timeslots[e], "mouthing": val}
48+
all_mouthings.append(mouthing_entry)
49+
3750
for (s, e, val, _) in german_text:
3851
sentence = {"participant": participant, "start": timeslots[s], "end": timeslots[e], "german": val}
3952

@@ -49,4 +62,12 @@ def get_elan_sentences(elan_path: str):
4962
)
5063
)
5164

65+
# add mouthings
66+
sentence["mouthings"] = list(
67+
sorted(
68+
[item for item in all_mouthings if item["start"] >= sentence["start"] and item["end"] <= sentence["end"]],
69+
key=lambda d: d["start"],
70+
)
71+
)
72+
5273
yield sentence

0 commit comments

Comments
 (0)