@@ -40,52 +40,53 @@ def cut_sequence_by_time_distance(
40
40
return sequences
41
41
42
42
43
- def find_duplicates (
43
+ def duplication_check (
44
44
sequence : PointSequence ,
45
45
duplicate_distance : float ,
46
46
duplicate_angle : float ,
47
- ) -> T .List [int ]:
48
- if not sequence :
49
- return []
47
+ ) -> T .Tuple [ PointSequence , T . List [types . ErrorMetadata ] ]:
48
+ dedups : PointSequence = []
49
+ dups : T . List [ types . ErrorMetadata ] = []
50
50
51
- duplicates = []
52
51
sequence_iter = iter (sequence )
53
52
prev = next (sequence_iter )
54
- for idx , cur in enumerate (sequence_iter ):
53
+ if prev is None :
54
+ return dedups , dups
55
+ dedups .append (prev )
56
+
57
+ for cur in sequence_iter :
58
+ # invariant: prev is processed
55
59
distance = geo .gps_distance (
56
60
(prev .lat , prev .lon ),
57
61
(cur .lat , cur .lon ),
58
62
)
59
- distance_duplicated = distance <= duplicate_distance
60
63
61
64
if prev .angle is not None and cur .angle is not None :
62
- bearing_delta = geo .diff_bearing (prev .angle , cur .angle )
63
- angle_duplicated = bearing_delta <= duplicate_angle
65
+ angle_diff = geo .diff_bearing (prev .angle , cur .angle )
64
66
else :
65
- angle_duplicated = False
66
-
67
- if distance_duplicated and angle_duplicated :
68
- duplicates .append (idx + 1 )
69
- continue
70
-
71
- prev = cur
72
-
73
- return duplicates
67
+ angle_diff = None
74
68
69
+ if distance <= duplicate_distance and (
70
+ angle_diff is not None and angle_diff <= duplicate_angle
71
+ ):
72
+ dups .append (
73
+ types .describe_error_metadata (
74
+ MapillaryDuplicationError (
75
+ f"Duplicate of its previous image in terms of distance <= { duplicate_distance } and angle <= { duplicate_angle } " ,
76
+ types .as_desc (cur ),
77
+ distance = distance ,
78
+ angle_diff = angle_diff ,
79
+ ),
80
+ cur .filename ,
81
+ filetype = types .FileType .IMAGE ,
82
+ ),
83
+ )
84
+ # prev does not change
85
+ else :
86
+ dedups .append (cur )
87
+ prev = cur
88
+ # invariant: cur is processed
75
89
76
- def duplication_check (
77
- sequence : PointSequence ,
78
- duplicate_distance : float ,
79
- duplicate_angle : float ,
80
- ) -> T .Tuple [PointSequence , PointSequence ]:
81
- dup_indices = find_duplicates (
82
- sequence ,
83
- duplicate_distance = duplicate_distance ,
84
- duplicate_angle = duplicate_angle ,
85
- )
86
- dup_set = set (dup_indices )
87
- dedups = [image for idx , image in enumerate (sequence ) if idx not in dup_set ]
88
- dups = [image for idx , image in enumerate (sequence ) if idx in dup_set ]
89
90
return dedups , dups
90
91
91
92
@@ -207,14 +208,7 @@ def process_sequence_properties(
207
208
duplicate_angle = duplicate_angle ,
208
209
)
209
210
assert len (sequence ) == len (dedups ) + len (dups )
210
- for dup in dups :
211
- error_metadatas .append (
212
- types .describe_error_metadata (
213
- MapillaryDuplicationError ("duplicated" , types .as_desc (dup )),
214
- dup .filename ,
215
- filetype = types .FileType .IMAGE ,
216
- ),
217
- )
211
+ error_metadatas .extend (dups )
218
212
219
213
# interpolate angles
220
214
if interpolate_directions :
0 commit comments