Skip to content

Commit a5a1789

Browse files
authored
Merge pull request #19 from sign-language-processing/dgs_sentences
feat(dgs_corpus): add sentence level loading
2 parents 9daa7d9 + 66661a9 commit a5a1789

File tree

7 files changed

+322
-87
lines changed

7 files changed

+322
-87
lines changed

examples/load.ipynb

Lines changed: 130 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
"cell_type": "markdown",
2121
"metadata": {
2222
"id": "view-in-github",
23-
"colab_type": "text"
23+
"colab_type": "text",
24+
"pycharm": {
25+
"name": "#%% md\n"
26+
}
2427
},
2528
"source": [
2629
"<a href=\"https://colab.research.google.com/github/sign-language-processing/datasets/blob/master/examples/load.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
@@ -29,7 +32,10 @@
2932
{
3033
"cell_type": "code",
3134
"metadata": {
32-
"id": "ov6fuFwGjlsy"
35+
"id": "ov6fuFwGjlsy",
36+
"pycharm": {
37+
"name": "#%%\n"
38+
}
3339
},
3440
"source": [
3541
"%%capture\n",
@@ -41,7 +47,10 @@
4147
{
4248
"cell_type": "code",
4349
"metadata": {
44-
"id": "C4PZsi6pPp9j"
50+
"id": "C4PZsi6pPp9j",
51+
"pycharm": {
52+
"name": "#%%\n"
53+
}
4554
},
4655
"source": [
4756
"import tensorflow_datasets as tfds\n",
@@ -56,7 +65,10 @@
5665
{
5766
"cell_type": "markdown",
5867
"metadata": {
59-
"id": "PKGZ4JXCZmSE"
68+
"id": "PKGZ4JXCZmSE",
69+
"pycharm": {
70+
"name": "#%% md\n"
71+
}
6072
},
6173
"source": [
6274
"# RWTH Phoenix 2014 T"
@@ -65,7 +77,10 @@
6577
{
6678
"cell_type": "code",
6779
"metadata": {
68-
"id": "8wU1Q4URqRBE"
80+
"id": "8wU1Q4URqRBE",
81+
"pycharm": {
82+
"name": "#%%\n"
83+
}
6984
},
7085
"source": [
7186
"config = SignDatasetConfig(name=\"only-annotations\", version=\"3.0.0\", include_video=False)\n",
@@ -82,7 +97,10 @@
8297
{
8398
"cell_type": "markdown",
8499
"metadata": {
85-
"id": "v6iBwM9lTzS6"
100+
"id": "v6iBwM9lTzS6",
101+
"pycharm": {
102+
"name": "#%% md\n"
103+
}
86104
},
87105
"source": [
88106
"# Dicta Sign"
@@ -91,7 +109,10 @@
91109
{
92110
"cell_type": "code",
93111
"metadata": {
94-
"id": "EQWUAgpVT0bK"
112+
"id": "EQWUAgpVT0bK",
113+
"pycharm": {
114+
"name": "#%%\n"
115+
}
95116
},
96117
"source": [
97118
"config = SignDatasetConfig(name=\"only-annotations\", version=\"1.0.0\", include_video=False, include_pose=None)\n",
@@ -106,7 +127,10 @@
106127
{
107128
"cell_type": "markdown",
108129
"metadata": {
109-
"id": "OcIs13W6TfWz"
130+
"id": "OcIs13W6TfWz",
131+
"pycharm": {
132+
"name": "#%% md\n"
133+
}
110134
},
111135
"source": [
112136
"# ChicagoFSWild+"
@@ -115,7 +139,10 @@
115139
{
116140
"cell_type": "code",
117141
"metadata": {
118-
"id": "o1X1kIgoTfec"
142+
"id": "o1X1kIgoTfec",
143+
"pycharm": {
144+
"name": "#%%\n"
145+
}
119146
},
120147
"source": [
121148
"# Version 2.0.0 is ChicagoFSWild+, 1.0.0 is ChicagoFSWild\n",
@@ -131,7 +158,10 @@
131158
{
132159
"cell_type": "markdown",
133160
"metadata": {
134-
"id": "XK7jyOOtYv_P"
161+
"id": "XK7jyOOtYv_P",
162+
"pycharm": {
163+
"name": "#%% md\n"
164+
}
135165
},
136166
"source": [
137167
"# AUTSL"
@@ -140,7 +170,10 @@
140170
{
141171
"cell_type": "code",
142172
"metadata": {
143-
"id": "dfZnI9K8YxfJ"
173+
"id": "dfZnI9K8YxfJ",
174+
"pycharm": {
175+
"name": "#%%\n"
176+
}
144177
},
145178
"source": [
146179
"config = SignDatasetConfig(name=\"only-annotations\", version=\"1.0.0\", include_video=False)\n",
@@ -155,7 +188,10 @@
155188
{
156189
"cell_type": "markdown",
157190
"metadata": {
158-
"id": "rykmI68x3E07"
191+
"id": "rykmI68x3E07",
192+
"pycharm": {
193+
"name": "#%% md\n"
194+
}
159195
},
160196
"source": [
161197
"# SignBank"
@@ -164,7 +200,10 @@
164200
{
165201
"cell_type": "code",
166202
"metadata": {
167-
"id": "12XcWfeg21kE"
203+
"id": "12XcWfeg21kE",
204+
"pycharm": {
205+
"name": "#%%\n"
206+
}
168207
},
169208
"source": [
170209
"signbank = tfds.load(name='sign_bank')\n",
@@ -178,7 +217,10 @@
178217
{
179218
"cell_type": "markdown",
180219
"metadata": {
181-
"id": "biXjC80j17n1"
220+
"id": "biXjC80j17n1",
221+
"pycharm": {
222+
"name": "#%% md\n"
223+
}
182224
},
183225
"source": [
184226
"# SignTyp (https://signtyp.uconn.edu/signpuddle/index.php?ui=1&sgn=9032)\n"
@@ -187,7 +229,10 @@
187229
{
188230
"cell_type": "code",
189231
"metadata": {
190-
"id": "dVgbyUIg165c"
232+
"id": "dVgbyUIg165c",
233+
"pycharm": {
234+
"name": "#%%\n"
235+
}
191236
},
192237
"source": [
193238
"config = SignDatasetConfig(name=\"only-annotations\", version=\"1.0.0\", include_video=False, extra={\"PHPSESSID\": \"hj9co07ct7f5noq529no9u09l4\"})\n",
@@ -202,7 +247,10 @@
202247
{
203248
"cell_type": "markdown",
204249
"metadata": {
205-
"id": "yOLfw9-z2qK7"
250+
"id": "yOLfw9-z2qK7",
251+
"pycharm": {
252+
"name": "#%% md\n"
253+
}
206254
},
207255
"source": [
208256
"# Sign2Mint"
@@ -211,7 +259,10 @@
211259
{
212260
"cell_type": "code",
213261
"metadata": {
214-
"id": "X96ogmu_22zv"
262+
"id": "X96ogmu_22zv",
263+
"pycharm": {
264+
"name": "#%%\n"
265+
}
215266
},
216267
"source": [
217268
"config = SignDatasetConfig(name=\"only-annotations\", version=\"1.0.0\", include_video=False)\n",
@@ -226,7 +277,10 @@
226277
{
227278
"cell_type": "markdown",
228279
"metadata": {
229-
"id": "jnf4AaX936w4"
280+
"id": "jnf4AaX936w4",
281+
"pycharm": {
282+
"name": "#%% md\n"
283+
}
230284
},
231285
"source": [
232286
"# SWOJS Glossário"
@@ -235,7 +289,10 @@
235289
{
236290
"cell_type": "code",
237291
"metadata": {
238-
"id": "shQxQtQP359y"
292+
"id": "shQxQtQP359y",
293+
"pycharm": {
294+
"name": "#%%\n"
295+
}
239296
},
240297
"source": [
241298
"config = SignDatasetConfig(name=\"only-annotations\", version=\"1.0.0\", include_video=False)\n",
@@ -253,7 +310,10 @@
253310
{
254311
"cell_type": "markdown",
255312
"metadata": {
256-
"id": "pNJdG7ExZugh"
313+
"id": "pNJdG7ExZugh",
314+
"pycharm": {
315+
"name": "#%% md\n"
316+
}
257317
},
258318
"source": [
259319
"# DGS Corpus"
@@ -262,7 +322,10 @@
262322
{
263323
"cell_type": "code",
264324
"metadata": {
265-
"id": "TVjrhsbtbWbX"
325+
"id": "TVjrhsbtbWbX",
326+
"pycharm": {
327+
"name": "#%%\n"
328+
}
266329
},
267330
"source": [
268331
"%%capture\n",
@@ -271,6 +334,18 @@
271334
"execution_count": null,
272335
"outputs": []
273336
},
337+
{
338+
"cell_type": "markdown",
339+
"source": [
340+
"## Document Level example (Long videos)"
341+
],
342+
"metadata": {
343+
"collapsed": false,
344+
"pycharm": {
345+
"name": "#%% md\n"
346+
}
347+
}
348+
},
274349
{
275350
"cell_type": "code",
276351
"metadata": {
@@ -300,6 +375,40 @@
300375
"execution_count": null,
301376
"outputs": []
302377
},
378+
{
379+
"cell_type": "markdown",
380+
"source": [
381+
"## Sentence level example (Videos are broken down to sentences)"
382+
],
383+
"metadata": {
384+
"collapsed": false,
385+
"pycharm": {
386+
"name": "#%% md\n"
387+
}
388+
}
389+
},
390+
{
391+
"cell_type": "code",
392+
"execution_count": null,
393+
"outputs": [],
394+
"source": [
395+
"from sign_language_datasets.datasets.dgs_corpus import DgsCorpusConfig\n",
396+
"\n",
397+
"config = DgsCorpusConfig(name=\"only-annotations-sentence-level\", version=\"1.0.0\", include_video=False, include_pose=None, data_type=\"sentence\")\n",
398+
"dgs_corpus = tfds.load('dgs_corpus', builder_kwargs=dict(config=config))\n",
399+
"\n",
400+
"for datum in itertools.islice(dgs_corpus[\"train\"], 0, 5):\n",
401+
" sentence = datum[\"sentence\"][\"german\"].numpy().decode('utf-8')\n",
402+
" print(sentence)\n",
403+
" print(datum)"
404+
],
405+
"metadata": {
406+
"collapsed": false,
407+
"pycharm": {
408+
"name": "#%%\n"
409+
}
410+
}
411+
},
303412
{
304413
"cell_type": "markdown",
305414
"source": [

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
typing-extensions
12
python-dotenv
23
tqdm
34
pose-format>=0.0.3

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@
1111
setup(
1212
name="sign-language-datasets",
1313
packages=packages,
14-
version="0.1.1",
14+
version="0.1.2",
1515
description="TFDS Datasets for sign language",
1616
author="Amit Moryossef",
1717
author_email="amitmoryossef@gmail.com",
1818
url="https://github.com/sign-language-processing/datasets",
1919
keywords=[],
20-
install_requires=["python-dotenv", "tqdm", "pose-format", "tfds-nightly", "tensorflow", "numpy", "pympi-ling",
21-
"Pillow", "opencv-python==4.5.5.64"],
20+
install_requires=["typing-extensions", "python-dotenv", "tqdm", "pose-format", "tfds-nightly", "tensorflow",
21+
"numpy", "pympi-ling", "Pillow", "opencv-python==4.5.5.64"],
2222
tests_require=['pytest', 'pytest-cov'],
2323
long_description=long_description,
2424
long_description_content_type="text/markdown",

sign_language_datasets/datasets/config.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ def __init__(
1414
include_pose: Optional[str] = None,
1515
fps: Optional[float] = None,
1616
resolution: Optional[Tuple[int, int]] = None,
17-
split: Optional[str] = None,
1817
extra: dict = {},
1918
**kwargs,
2019
):
@@ -35,7 +34,6 @@ def __init__(
3534

3635
self.fps = fps
3736
self.resolution = resolution
38-
self.split = split
3937
self.extra = extra
4038

4139
def ffmpeg_args(self):
@@ -69,4 +67,12 @@ def encode_example(self, video_or_path_or_fobj):
6967
_, h, w, _ = self.shape
7068
video_or_path_or_fobj = [cv2.resize(cv2.imread(f), (w, h)) for f in video_or_path_or_fobj]
7169

70+
# In case where additional ffmpeg parameters are needed
71+
if isinstance(video_or_path_or_fobj, dict) and "video" in video_or_path_or_fobj and isinstance(video_or_path_or_fobj["video"], str):
72+
old_args = list(self._extra_ffmpeg_args)
73+
self._extra_ffmpeg_args += video_or_path_or_fobj["ffmpeg_args"]
74+
result = super(VideoFeature, self).encode_example(video_or_path_or_fobj["video"])
75+
self._extra_ffmpeg_args = old_args
76+
return result
77+
7278
return super(VideoFeature, self).encode_example(video_or_path_or_fobj)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""dgs_corpus dataset."""
22

3-
from .dgs_corpus import DgsCorpus
3+
from .dgs_corpus import DgsCorpus, DgsCorpusConfig

0 commit comments

Comments
 (0)