Skip to content

Commit 363403d

Browse files
authored
Merge pull request #24 from bricksdont/dgs_signer_id
feature(dgs_corpus): util to extract signer ids
2 parents 38e317e + 5a64e39 commit 363403d

File tree

2 files changed

+89
-1
lines changed

2 files changed

+89
-1
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
setup(
1212
name="sign-language-datasets",
1313
packages=packages,
14-
version="0.1.4",
14+
version="0.1.5",
1515
description="TFDS Datasets for sign language",
1616
author="Amit Moryossef",
1717
author_email="amitmoryossef@gmail.com",

sign_language_datasets/datasets/dgs_corpus/dgs_utils.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import pympi
22

3+
from lxml import etree
4+
from typing import Dict, List
5+
36

47
def get_elan_sentences(elan_path: str):
58

@@ -71,3 +74,88 @@ def get_elan_sentences(elan_path: str):
7174
)
7275

7376
yield sentence
77+
78+
79+
def get_child_elements(root: etree.ElementTree,
80+
element_name: str,
81+
attributes_to_extract: List[str]) -> Dict[str, Dict[str, str]]:
82+
"""
83+
84+
:param root:
85+
:param element_name:
86+
:param attributes_to_extract:
87+
:return:
88+
"""
89+
90+
elements = root.xpath("/ilex-data/" + element_name) # type: List[etree.Element]
91+
92+
by_id = {}
93+
94+
for element in elements:
95+
id_ = element.get("id")
96+
by_id[id_] = {}
97+
for attribute_name in attributes_to_extract:
98+
value = element.get(attribute_name)
99+
by_id[id_][attribute_name] = value
100+
101+
return by_id
102+
103+
104+
def get_signer_ids_from_ilex(ilex_path: str) -> Dict[str, List[str]]:
105+
"""
106+
107+
File structure:
108+
109+
<ilex-data source="meinedgs.de" version="1.1" database_version="51">
110+
<camera_perspective id="1" code="A1" english="Frontal view on informant A"
111+
localised="Frontalansicht Informant A" visible_persons="{1}"/>
112+
<camera_perspective id="2" code="B1" english="Frontal view on informant B"
113+
localised="Frontalansicht Informant B" visible_persons="{2}"/>
114+
<camera_perspective id="3" code="C" english="Total on all three persons"
115+
localised="Totale auf alle drei Personen" visible_persons="{2,3,1}"/>
116+
<movie_track id="3" movie="1" camera_perspective="3" path="./1177918_1c.mp4"
117+
track_length="00:09:25:04"/>
118+
<movie_track id="1" movie="1" camera_perspective="1" path="./1177918_1a1.mp4"
119+
track_length="00:09:25:04"/>
120+
<movie_track id="2" movie="1" camera_perspective="2" path="./1177918_1b1.mp4"
121+
track_length="00:09:25:04"/>
122+
<informant id="1" sex="1" name="SH-12" short_name="SH-12"/>
123+
<informant id="2" sex="1" name="SH-13" short_name="SH-13"/>
124+
<informant id="3" sex="2" name="sh-mod-1" short_name="sh-mod-1"/>
125+
<participation id="1" movie="1" role="1" informant="1"/>
126+
<participation id="2" movie="1" role="1" informant="2"/>
127+
<participation id="3" movie="1" role="2" informant="3"/>
128+
<!--...-->
129+
</ilex-data>
130+
131+
:param ilex_path:
132+
:return:
133+
"""
134+
135+
root = etree.parse(ilex_path)
136+
137+
informant_dict = get_child_elements(root=root,
138+
element_name="informant",
139+
attributes_to_extract=["name"])
140+
141+
camera_perspective_dict = get_child_elements(root=root,
142+
element_name="camera_perspective",
143+
attributes_to_extract=["visible_persons", "code"])
144+
145+
signer_identities_by_perspective = {} # type: Dict[str, List[str]]
146+
147+
for camera_perspective in camera_perspective_dict.values():
148+
149+
# extract A, B or C without trailing numbers
150+
151+
clean_code = camera_perspective["code"][0].lower()
152+
153+
# remove enclosing "{" and "}" for list of informant ids
154+
155+
ids_of_visible_persons = camera_perspective["visible_persons"][1:-1].split(",")
156+
157+
names_of_visible_persons = [informant_dict[id_]["name"] for id_ in ids_of_visible_persons]
158+
159+
signer_identities_by_perspective[clean_code] = names_of_visible_persons
160+
161+
return signer_identities_by_perspective

0 commit comments

Comments
 (0)