-
Notifications
You must be signed in to change notification settings - Fork 3
/
mean_difference.py
235 lines (217 loc) · 8.2 KB
/
mean_difference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/usr/bin/env python
u"""
Yara Mohajerani (05/2020)
Update History
06/2020 Use bounding box to get line pairs and add plotting
05/2020 Written
"""
import os
import sys
import fiona
import getopt
import numpy as np
from scipy import stats
from shapely.geometry import LineString,Polygon
#-- directory setup
ddir = os.path.expanduser('~/GL_learning_data/geocoded_v1')
gdrive = os.path.join(os.path.expanduser('~'),'Google Drive File Stream',
'Shared drives','GROUNDING_LINE_TEAM_DRIVE','ML_Yara')
lbl_dir = os.path.join(gdrive,'SOURCE_SHP')
#-- main function
def main():
#-- Read the system arguments listed after the program
long_options=['DIR=','FILTER=','NAMES=','PLOT']
optlist,arglist = getopt.getopt(sys.argv[1:],'D:F:N:P',long_options)
#-- Set default settings
subdir = 'atrous_32init_drop0.2_customLossR727.dir'
FILTER = 6000.
plot_dists = False
NAMES = None
for opt, arg in optlist:
if opt in ("-D","--DIR"):
subdir = arg
elif opt in ("-F","--FILTER"):
if arg not in ['NONE','none','None','N','n',0]:
FILTER = float(arg)
elif opt in ("-P","--PLOT"):
plot_dists = True
elif opt in ("-N","--NAMES"):
NAMES = arg
flt_str = '_%.1fkm'%(FILTER/1000)
#-- import necessary packages if making plots
if plot_dists:
import matplotlib.pyplot as plt
from descartes import PolygonPatch
#-- Get list of postprocessed files
pred_dir = os.path.join(ddir,'stitched.dir',subdir,'shapefiles.dir')
if NAMES is None:
fileList = os.listdir(pred_dir)
pred_list = [f for f in fileList if (f.endswith('%s.shp'%flt_str) and (f.startswith('gl_')))]
else:
pred_list = NAMES.split(',')
#-- out file for saving average error for each file
outtxt = open(os.path.join(pred_dir,'error_summary%s.txt'%(flt_str)),'w')
outtxt.write('Average\tError(m)\tMIN(m)\tMAX(m) \t File\n')
#-- initialize array for distances, minimums, and maxmimums
distances = np.zeros(len(pred_list))
minims = np.zeros(len(pred_list))
maxims = np.zeros(len(pred_list))
haus_max = np.zeros(len(pred_list))
#-- go through files and get pairwise distances
for count,f in enumerate(pred_list):
#-- read ML line
fid1 = fiona.open(os.path.join(pred_dir,f),'r')
#-- loop over ML lines and save all test lines
ml_lines = []
for j in range(len(fid1)):
g = fid1.next()
gid = g['properties']['ID']
if (('err' not in gid) and (g['properties']['Type']=='Test')):
if (g['properties']['Class'] == 'Grounding Line'):
ml_lines.append(LineString(g['geometry']['coordinates']))
fid1.close()
#-- read label file
fid2 = fiona.open(os.path.join(lbl_dir,f.replace('%s.shp'%flt_str,'.shp')),'r')
#-- loop over the hand-written lines and save all coordinates
hd_lines = []
for j in range(len(fid2)):
g2 = fid2.next()
if not g2['geometry'] is None:
hd_lines.append(LineString(g2['geometry']['coordinates']))
fid2.close()
#-- plot distnaces if specified
if plot_dists:
fig = plt.figure(1, figsize=(8,8))
ax = fig.add_subplot(111)
#-- initialize array of all pairwise distances and used indices
dist = []
#-- loop over ML line segments and form bounding boxes
for ml_original in ml_lines:
#-- break the line into n_seg segments
lcoords = list(ml_original.coords)
if len(lcoords) < 50:
ml_broken = [ml_original]
else:
#-- start from segments of 10 coordiantes, and increase until
#-- you find a divisible number
n_seg = 50
while len(lcoords)%n_seg < 10:
n_seg += 1
ml_broken = []
bc = 0
while bc < len(lcoords):
ml_broken.append(LineString(lcoords[bc:bc+n_seg]))
bc += n_seg
"""
#-- if the line is more than 50 km long, break it into smaller lines
if ml_original.length >= 50e3:
#-- this is a rough breakdown. We can just break it into 100 indices at a time
lcoords = list(ml_original.coords)
ml_broken = []
bc = 0
while bc+100 < len(lcoords):
ml_broken.append(LineString(lcoords[bc:bc+100]))
bc += 100
if bc < len(lcoords)-2:
ml_broken.append(LineString(lcoords[bc:]))
else:
ml_broken = [ml_original]
"""
for ml in ml_broken:
box = ml.minimum_rotated_rectangle
#-- get hd line segment that intersects the box
for hd in hd_lines:
overlap = hd.intersection(box)
#-- if more than 20% of length is within the box, consider the line
if overlap.length > hd.length/5:
if plot_dists:
if box.geom_type == 'Polygon':
ppatch = PolygonPatch(box,alpha=0.2,facecolor='skyblue')
ax.add_patch(ppatch)
#-- we have found the line pairning. Get mean distance
#-- lines intersect. Now Find the shorter line to use as reference
if ml.length <= hd.length:
#-- use ML line as reference
x1,y1 = ml.coords.xy
x2,y2 = hd.coords.xy
if plot_dists:
ax.plot(x1,y1,color='red')
ax.plot(x2,y2,color='blue')
else:
#-- use manual line as reference (set as x1,y1)
x1,y1 = hd.coords.xy
x2,y2 = ml.coords.xy
if plot_dists:
ax.plot(x1,y1,color='blue')
ax.plot(x2,y2,color='red')
#-- go along x1,y1 and find closest points on x2,y2
d = np.empty((len(x1),len(x2)),dtype=float)
ind_list = np.empty(len(x1),dtype=int)
for i in range(len(x1)):
#-- get list of distances
d[i,:] = np.sqrt((np.array(x2)-x1[i])**2 + (np.array(y2)-y1[i])**2)
#-- get index of shortest distanace
ind_list[i] = np.argmin(d[i,:])
#-- Now check check if multiple points of the reference line point to the same
#-- (x2,y2) point
#-- first get list of unique indices
unique_list = list(set(ind_list))
#-- sort in ascending order
unique_list.sort()
#-- get how many times each unique index is repeated
u_count = np.zeros(len(unique_list),dtype=int)
#-- loop through unique indices and find all corresponding indices
for k,u in enumerate(unique_list):
u_count[k] = np.count_nonzero(ind_list == u)
#-- for repeating indices that are side-by-side (for example many 4s and many 5s),
#-- the line is out of bounds of the other line, and the far-away points are
#-- alternating between a few points on the refernec line. Make them all the same index
remove_list = []
for k in range(len(unique_list)):
if u_count[k] > 1:
#-- compare with element after
if (unique_list[k]+1 in unique_list):
ii, = np.nonzero(ind_list == unique_list[k])
jj, = np.nonzero(ind_list == unique_list[k]+1)
if np.min(d[ii,unique_list[k]]) < np.min(d[jj,unique_list[k]]):
remove_list.append(unique_list[k]+1)
else:
remove_list.append(unique_list[k])
#-- remove duplicate elements
remove_list = list(set(remove_list))
for r in remove_list:
unique_list.remove(r)
#-- loop through unique indices and find all corresponding indices
#-- NOTE we make a list of the total indices, which allows us to also delete
#-- repeated indices (if not deleting, this is redundant. Can just use ':')
xlist = np.arange(len(x1))
for u in unique_list:
w = np.argmin(d[xlist,u])
dist.append(d[xlist[w],u])
if plot_dists:
ax.plot([x1[xlist[w]],x2[u]],[y1[xlist[w]],y2[u]],color='gray')
#-- since we used this index, take it out
# xlist = np.delete(xlist,w)
distances[count] = np.mean(dist)
if len(dist) != 0:
minims[count] = np.min(dist)
maxims[count] = np.max(dist)
else:
minims[count] = np.nan
maxims[count] = np.nan
haus_max[count] = np.nan
outtxt.write('%.1f \t %.1f \t %.1f \t\t %s\n'%(distances[count],minims[count],maxims[count],f))
if plot_dists:
plt.savefig(os.path.join(pred_dir,f.replace('.shp','_dist.pdf')),format='PDF')
plt.close(fig)
#-- also save the overal average
outtxt.write('\nMEAN\t\t\t\t%.1f m\n'%(np.nanmean(distances)))
outtxt.write('MIN\t\t\t\t\t%.1f m\n'%(np.nanmin(minims)))
outtxt.write('MAX\t\t\t\t\t%.1f m\n'%(np.nanmax(maxims)))
outtxt.write('Interquartile Range\t%.1f m\n'%(stats.iqr(distances,nan_policy='omit')))
outtxt.write('MAD\t\t\t\t\t%.1f m\n'%(stats.median_absolute_deviation(distances,nan_policy='omit')))
outtxt.write('STD\t\t\t\t\t%.1f m\n'%(np.nanstd(distances)))
outtxt.close()
#-- run main program
if __name__ == '__main__':
main()