-
Notifications
You must be signed in to change notification settings - Fork 0
/
eaf_extract.py
294 lines (257 loc) · 11 KB
/
eaf_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
import pandas as pd
import math
import sys, os
import matplotlib.pyplot as mp
import matplotlib as mpl
import matplotlib.dates as dates
import numpy as np
from matplotlib.colors import Normalize
from matplotlib import cm
import argparse
import pympi
import re
# Use PYVENV in ~/Development
# ============================================================================
# Combines the sensor data and annotations into a data file.
#
# The arguments `D`, `V`, `A` and `Z` choose distances, velocities,
# accelerations and directions respectively. These can be modified by
# adding `N` or `S` for normalised (scale -1..1) or standardised
# (subtract the mean, divide by the standard deviation) respectively.
# Note that the direction and azimuth values are not modified. Also note
# that all the values are calculated by the Rust code.
#
# Tiers (which become targets) from the EAF file can be chosen with
# the `-t` argument. The different annotations will be turned into
# (numeric) classes and become the target in the last column of the
# output (named after the tier).
#
# The data file needs to contain the Timestamp field to be able to match
# the EAF data with the MoCap data. Use the --timestamp option if you
# are using the Rust code to process the MoCap data.
#
# (PYVENV) pberck@ip21-178 mocap %
# python eaf_extract.py -e gestures_ML_05.eaf -F LHandIn_in -t LHand
# or
# python eaf_extract.py -e gestures_ML_05.eaf -F ".HandIn_*" -t LHand
# or
# python eaf_extract.py -e gestures_ML_05.eaf -F ".HandIn_.$" -t LHand -t RHand
#
# Useable data can be generated as follow.
# cargo run -- -f gestures_ML_05.tsv --timestamp -s2 > gestures_ML_05_data.tsv
# ============================================================================
'''
Workflow:
generate distances/velocities/etc
cargo run --release -- -f gestures_ML_05.tsv --timestamp -s2 >gestures_ML_05_data.tsv
create datafile for NN training
python eaf_extract.py -d gestures_ML_05_data.tsv -e gestures_ML_05.eaf -F "LHandIn"
-t LHand -o gestures_ML_05_data_targets_LHLH_ND.tsv -N -D
train
python torch_mocap_12.py -t gestures_ML_05_data_targets_LHLH_ND.tsv
'''
# We need an output sensor list as well
parser = argparse.ArgumentParser()
parser.add_argument( "-d", "--datafilename", default="gestures_ML_05_data.tsv",
help="Data file to use." )
parser.add_argument( "-e", "--eaffilename", default=None,
help="EAF file to use." )
parser.add_argument( "-t", "--tiernames", default=[], action="append",
help="Tiernames to include in processing." )
parser.add_argument( "-o", "--output", default="eaf_targets.tsv",
help="Output filename." )
parser.add_argument( "-F", "--filter", action="append",
help="Regexp to filter output sensor names.", default=[] )
parser.add_argument( "-c", "--classnames", help="Use class names", action="store_true" ) # names instead of ints
parser.add_argument( "-S", "--standardised", help="Output _S values", action="store_true" ) # standardised
parser.add_argument( "-N", "--normalised", help="Output _N values", action="store_true" ) # normalised
parser.add_argument( "-D", "--distances", help="Output distance values", action="store_true" )
parser.add_argument( "-V", "--velocities", help="Output velocity values", action="store_true" )
parser.add_argument( "-A", "--accelerations", help="Output acceleration values", action="store_true" )
parser.add_argument( "-Z", "--directions", help="Output direction values", action="store_true" )
args = parser.parse_args()
# ============================================================================
# Check files.
# ============================================================================
if os.path.exists( args.output ):
print( f"Output file {args.output} already exists, overwriting." )
#sys.exit(3)
# ============================================================================
# Read the EAF file.
# ============================================================================
eaf = None
if args.eaffilename:
if os.path.exists( args.eaffilename ):
eaf = pympi.Elan.Eaf(file_path=args.eaffilename, author='eaf_extract.py')
# ============================================================================
# Get data.
# ============================================================================
if not os.path.exists( args.datafilename ):
print( f"Data file {args.datafilename} does not exist, quitting." )
sys.exit(1)
df_data = pd.read_csv(
args.datafilename,
sep="\t"
)
# ============================================================================
# Keep the ones in the filter.
# Assume we have Frame and Timestamp (these are needed, use the --timestamp
# option when generating data with mocap/main.rs).
# ============================================================================
filtered_columns = []
args.filter.append( "Timestamp" ) # These are automaticall filtered.
args.filter.append( "Frame" )
for sensor in df_data.columns: # This is an "or" filter.
for filter_re in args.filter:
if re.search( filter_re, sensor ):
filtered_columns.append( sensor )
if len(filtered_columns) == 2: # If none (only TS and F), take all!
filtered_columns = df_data.columns
# Check for -S, -N etc
new_cols = []
new_cols.append( "Frame" )
new_cols.append( "Timestamp" )
for col in filtered_columns:
suffix = col[-2:]
if args.directions:
if suffix == "az":
new_cols.append( col )
elif suffix == "in":
new_cols.append( col )
# If we specify -D and not -N, we get the raw distance,
# If we specify -D and -N, we only get the normalised distance.
# The -S adds the standardised distance to one of the above.
if args.distances:
if not args.normalised and suffix == "_d":
new_cols.append( col )
if suffix == "dN" and args.normalised:
new_cols.append( col )
elif suffix == "dS" and args.standardised:
new_cols.append( col )
if args.velocities:
if not args.normalised and suffix == "_v":
new_cols.append( col )
if suffix == "vN" and args.normalised:
new_cols.append( col )
elif suffix == "vS" and args.standardised:
new_cols.append( col )
if args.accelerations:
if not args.normalised and suffix == "_a":
new_cols.append( col )
if suffix == "aN" and args.normalised:
new_cols.append( col )
elif suffix == "aS" and args.standardised:
new_cols.append( col )
df_data = df_data[new_cols]
# ============================================================================
# Print.
# ============================================================================
print( df_data.head() )
print( df_data.tail() )
# ============================================================================
# Get EAF info/tiers/annotations.
# ============================================================================
# Insert the EAF columns.
# Assume we have a Timestamp column.
if "Timestamp" not in df_data.columns:
print( "Data does not have a timestamp." ) # We could add it...
sys.exit(4)
time_delta = df_data.loc[df_data.index[1], 'Timestamp'] - df_data.loc[df_data.index[0], 'Timestamp']
print( time_delta, 1.0/time_delta )
# We could add the full text EAF annotation names as well...
#df_data.insert( len(df_data.columns), "EAF", 0 )
#print( df_data.head() )
# get_full_time_interval()
# get_tier_names()
tier_names = []
if eaf:
tier_names = eaf.get_tier_names()
print( "EAF Tiers", tier_names )
# Initialising classes here gives a unique class index to
# every annotation across all tiers.
classes = ["NONE"]
# If we did not specify any tiers, we take them all.
if not args.tiernames:
args.tiernames = tier_names # These can still be null if no EAF file.
#df_targets = df_data.iloc[:, [0, 1]].copy()
for tier in args.tiernames:
print( "TIER", tier )
####classes = ["NONE"] # Initialising classes here repeats class indices for each tier.
if args.classnames:
df_data.insert( len(df_data.columns), tier, "NONE" ) # tier as "EAF"
else:
df_data.insert( len(df_data.columns), tier, 0 ) # tier as "EAF"
##df_targets.insert( len(df_targets.columns), tier, 0 ) # tier as "EAF"
annotation_data = []
if eaf:
annotation_data = eaf.get_annotation_data_for_tier( tier )
#print( annotation_data )
for a,b,x in annotation_data:
cl_name = tier + "-" + x
if cl_name not in classes:
classes.append( cl_name )
print( cl_name )
t_annotations = 0 # time with annotations
for t0, t1, cl in annotation_data:
t0m = t0 / 1000
t1m = t1 / 1000
cl_name = tier + "-" + cl
cli = classes.index( cl_name )
t_delta = t1 - t0
t_annotations += t_delta
# percentage of data with annotations
print( t0, t0m, t1, t1m, cl_name, cli, "{:.1f}".format(t_annotations*100.0 / t1) )
'''
time ............... class class index
35450 35.45 37210 37.21 g1 1
38410 38.41 39530 39.53 g2 2
'''
# Instead of EAF, use tier name?
#df_data.loc[ (df_data['Timestamp']>=t0m) & (df_data['Timestamp']<t1m), 'EAF' ] = cli
if args.classnames:
df_data.loc[ (df_data['Timestamp']>=t0m) & (df_data['Timestamp']<t1m), tier ] = cl_name
else:
df_data.loc[ (df_data['Timestamp']>=t0m) & (df_data['Timestamp']<t1m), tier ] = cli
##df_targets.loc[ (df_data['Timestamp']>=t0m) & (df_data['Timestamp']<t1m), tier ] = cli
print( classes )
print( df_data.head() )
print( df_data.tail() )
pd.set_option('display.max_rows', 500)
print( df_data.loc[ (df_data['Timestamp']>=24.600) & (df_data['Timestamp']<24.620)] )
print( "Saving output in", args.output )
df_data.to_csv(
args.output,
index=False,
sep="\t"
)
sys.exit(0)
print( "-" )
print( df_targets )
print( "-" )
# Expand doesn't really work...
with open(args.output, "w") as f:
for i in range(0, len(df_data)):
data_row = df_data.iloc[i, 2:] # 2: to skip frame and TS
target_row = df_targets.iloc[i]
for t in range(0, len(args.tiernames)):
print( data_row.values )
for v in data_row.values:
f.write("{}\t".format( v ))
f.write("{}\n".format(target_row.iloc[t+2]))
print( args )
sys.exit(1)
# This below needs to be in the loop maybe? or have different colums? for each tier
# create new data frame?
# Fill in the time bits... The data between the t0 and t1 timestamps
# get the class index as an EAF target.
for t0, t1, cl in annotation_data:
t0m = t0 / 1000
t1m = t1 / 1000
cli = classes.index( cl )
print( t0, t0m, t1, t1m, cl, cli )
'''
time ............... class class index
35450 35.45 37210 37.21 g1 1
38410 38.41 39530 39.53 g2 2
'''
df_data.loc[ (df_data['Timestamp']>=t0m) & (df_data['Timestamp']<t1m), 'EAF' ] = cli