-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmulti_linear.py
260 lines (188 loc) · 9.94 KB
/
multi_linear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 30 11:45:27 2020
@author: morenodu
"""
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import cartopy.io.shapereader as shpreader
import seaborn as sns
from scipy import signal
from mask_shape_border import mask_shape_border
from lag_linregress_3D import lag_linregress_3D
import matplotlib.ticker as plticker
# Crop space to either US or soy states
usa = gpd.read_file('gadm36_USA_1.shp', crs="epsg:4326")
us1_shapes = list(shpreader.Reader('gadm36_USA_1.shp').geometries())
state_names = ['Iowa','Illinois','Minnesota','Indiana','Nebraska','Ohio', 'South Dakota','North Dakota', 'Missouri','Arkansas']
soy_us_states = usa[usa['NAME_1'].isin( state_names)]
# Crop space to either BR or soy states
bra = gpd.read_file('gadm36_BRA_1.shp', crs="epsg:4326")
br1_shapes = list(shpreader.Reader('gadm36_BRA_1.shp').geometries())
state_br_names = ['Mato Grosso','Rio Grande do Sul','Paraná']
soy_br_states = bra[bra['NAME_1'].isin(state_br_names)]
DS_y=xr.open_dataset("yield_soy_1979-2012.nc",decode_times=False).sel(time=slice(1,31))
DS_y['time'] = pd.to_datetime(list(range(1980, 2011)), format='%Y').year
DS_y_us = mask_shape_border(DS_y,soy_us_states ) #clipping for us
df_wofost=DS_y_us.to_dataframe().groupby(['time']).mean() # pandas because not spatially variable anymore
DS_t_mean=xr.open_dataset("cru/cru_tmp.nc",decode_times=True).sel(time=slice('1980-01-01','2016-12-31'))
DS_t_max=xr.open_dataset("cru/cru_tmx.nc",decode_times=True).sel(time=slice('1980-01-01','2016-12-31'))
DS_t_min=xr.open_dataset("cru/cru_tmn.nc",decode_times=True).sel(time=slice('1980-01-01','2016-12-31'))
DS_prec=xr.open_dataset("cru/cru_pre.nc",decode_times=True).sel(time=slice('1980-01-01','2016-12-31'))
DS_evap=xr.open_dataset("cru/cru_vap.nc",decode_times=True).sel(time=slice('1980-01-01','2016-12-31'))
DS_wet=xr.open_dataset("cru/cru_wet.nc",decode_times=True).sel(time=slice('1980-01-01','2016-12-31'))
DS_cli = xr.merge([DS_t_mean.tmp,DS_prec.pre,DS_t_max.tmx,DS_evap.vap,DS_wet]).sel(time=slice('1980-01-01','2010-12-31'))
DS_cli_us = mask_shape_border(DS_cli, soy_us_states) #US-shape
#%% detrend climate CRU
#temp
da_cli_us_det_temp = DS_cli_us.tmp.where(DS_cli_us.tmp > -300, -300 )
mean_cli = da_cli_us_det_temp.mean(axis=0)
da_cli_us_det_temp1 = xr.DataArray(signal.detrend(da_cli_us_det_temp, axis=0), dims=da_cli_us_det_temp.dims, coords=da_cli_us_det_temp.coords, attrs=da_cli_us_det_temp.attrs) + mean_cli
da_cli_us_det_temp2 = da_cli_us_det_temp1.where(da_cli_us_det_temp1 > -100, np.nan ).sel(time = DS_cli_us.indexes['time'].month.isin([6, 7, 8,9,10]))
da_cli_us_det_temp_mean = da_cli_us_det_temp2.groupby('time').mean(...)
df_temp=da_cli_us_det_temp_mean.to_series()
#temp_max
da_cli_us_det_tmax = DS_cli_us.tmx.where(DS_cli_us.tmx > -300, -300 )
mean_cli = da_cli_us_det_tmax.mean(axis=0)
da_cli_us_det_tmax1 = xr.DataArray(signal.detrend(da_cli_us_det_tmax, axis=0), dims=da_cli_us_det_tmax.dims, coords=da_cli_us_det_tmax.coords, attrs=da_cli_us_det_tmax.attrs) + mean_cli
da_cli_us_det_tmax2 = da_cli_us_det_tmax1.where(da_cli_us_det_tmax1 > -100, np.nan ).sel(time = DS_cli_us.indexes['time'].month.isin([6, 7, 8,9,10]))
da_cli_us_det_tmax_mean = da_cli_us_det_tmax2.groupby('time').mean(...)
df_tmax=da_cli_us_det_tmax_mean.to_series()
#prec
da_cli_us_det_prec = DS_cli_us.pre.where(DS_cli_us.tmp > -300, -1 )
mean_cli = da_cli_us_det_prec.mean(axis=0)
da_cli_us_det_prec1 = xr.DataArray(signal.detrend(da_cli_us_det_prec, axis=0), dims=da_cli_us_det_prec.dims, coords=da_cli_us_det_prec.coords, attrs=da_cli_us_det_prec.attrs) + mean_cli
da_cli_us_det_prec2 = da_cli_us_det_prec1.where(da_cli_us_det_prec1 > -1, np.nan ).sel(time = DS_cli_us.indexes['time'].month.isin([6, 7, 8,9,10]))
da_cli_us_det_prec_mean = da_cli_us_det_prec2.groupby('time').mean(...)
df_prec=da_cli_us_det_prec_mean.to_series()
#vap
da_cli_us_det_e = DS_cli_us.vap.where(DS_cli_us.tmp > -300, -300)
mean_cli = da_cli_us_det_e.mean(axis=0)
da_cli_us_det_e1 = xr.DataArray(signal.detrend(da_cli_us_det_e, axis=0), dims=da_cli_us_det_e.dims, coords=da_cli_us_det_e.coords, attrs=da_cli_us_det_e.attrs) + mean_cli
da_cli_us_det_e2 = da_cli_us_det_e1.where(DS_cli_us.tmp > -300, np.nan ).sel(time = DS_cli_us.indexes['time'].month.isin([6, 7, 8,9,10]))
da_cli_us_det_e_mean = da_cli_us_det_e2.groupby('time').mean(...)
df_e=da_cli_us_det_e_mean.to_series()
da_wet = DS_cli_us.wet
da_wet = DS_cli_us['wet'].dt.days
#wet
da_cli_us_det_wet = DS_cli_us['wet'].dt.days.where(DS_cli_us.tmp > -300, -300)
mean_cli = da_cli_us_det_wet.mean(axis=0)
da_cli_us_det_wet1 = xr.DataArray(signal.detrend(da_cli_us_det_wet, axis=0), dims=da_cli_us_det_wet.dims, coords=da_cli_us_det_wet.coords, attrs=da_cli_us_det_wet.attrs) + mean_cli
da_cli_us_det_wet2 = da_cli_us_det_wet1.where(DS_cli_us.tmp > -300, np.nan ).sel(time = DS_cli_us.indexes['time'].month.isin([6, 7, 8,9,10]))
da_cli_us_det_wet_mean = da_cli_us_det_wet2.groupby('time').mean(...)
df_wet=da_cli_us_det_wet_mean.to_series()
#%% US states
da_cli_tmax8 = da_cli_us_det_tmax2.sel(time = da_cli_us_det_tmax2.indexes['time'].month.isin([8]))
da_cli_tmax8['time'] = DS_y_us['time']
da_cli_wet7 = da_cli_us_det_wet2.sel(time = da_cli_us_det_wet2.indexes['time'].month.isin([7]))
da_cli_wet7['time'] = DS_y_us['time']
cov,cor,slope, intercept,pval,stderr = lag_linregress_3D(x=da_cli_tmax8,y=DS_y_us['yield'])
plt.figure(figsize=(20,10))
ax=plt.axes(projection=ccrs.Mercator())
adm1_shapes = list(shpreader.Reader('gadm36_BRA_1.shp').geometries())
r2_score3D.plot(x='lon', y='lat',transform=ccrs.PlateCarree(), robust=True,cbar_kwargs={'label': 'R2 tmax8 and yield'}, levels=15)
loc = plticker.MultipleLocator(base=2.0) # this locator puts ticks at regular intervals
ax.set_xticks(ax.get_xticks()[::1]); ax.set_yticks(ax.get_yticks()[::1])
ax.add_geometries(us1_shapes, ccrs.PlateCarree(),edgecolor='black', facecolor=(0,1,0,0.0))
ax.set_extent([-125,-67,24,50], ccrs.PlateCarree())
plt.show()
r2_score3D = cor**2
predict = intercept + slope*da_cli_tmax8
rss = ((DS_y_us['yield'] - predict)**2).mean('time')
#%% second trial skleran-xarray
import numpy as np
import xarray as xr
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline, make_union
from scipy import stats
slope, intercept, r_value, p_value, std_err = stats.linregress(X_train.tmax8, y_train.values.ravel())
import statsmodels.api as sm
X_const = pd.concat([X_train.tmax8,X_train.wet7], axis=1, sort=False)
X_const = sm.add_constant(X_const)
model = sm.OLS(y_train,X_const)
results = model.fit()
results.params
print(results.summary())
coeff, r, rank, s = np.linalg.lstsq(X_const, y_train, rcond=0)
print(coeff, r, rank, s)
x1, x2, y = X_train.tmax8, X_train.wet7, y_train.values.ravel()
n = y.notnull().sum(dim='time')
x1mean=np.nanmean(x1,axis=0)
x2mean=np.nanmean(x2,axis=0)
ymean=np.nanmean(y,axis=0)
x1std=np.nanstd(x1,axis=0)
x2std=np.nanstd(x2,axis=0)
ystd=np.nanstd(y,axis=0)
#4. Compute covariance along time axis
cov = np.sum((x1 - x1mean)*(x2 - x2mean)*(y - ymean), axis=0) / (n)
#5. Compute correlation along time axis
cor = cov/(x1std*x2std*ystd)
#6. Compute regression slope and intercept:
slope = [cov/(x1std**2),cov/(x2std**2)]
intercept = ymean - x1mean*slope[0] - x2mean*slope[1]
#%%
def k_cor(x,y, pthres = 0.05, direction = True):
"""
Uses the scipy stats module to calculate a Kendall correlation test
:x vector: Input pixel vector to run tests on
:y vector: The date input vector
:pthres: Significance of the underlying test
:direction: output only direction as output (-1 & 1)
"""
# Check NA values
# Run the kendalltau test
tau, p_value = stats.kendalltau(x, y)
# Criterium to return results in case of Significance
if p_value < pthres:
# Check direction
if direction:
if tau < 0:
return -1
elif tau > 0:
return 1
else:
return tau
else:
return 0
# The function we are going to use for applying our kendal test per pixel
def kendall_correlation(x,y,dim='year'):
x,y = xr.align(x,y)
# x = Pixel value, y = a vector containing the date, dim == dimension
return xr.apply_ufunc(
k_cor, x , y,
input_core_dims=[[dim], [dim]],
vectorize=True, # !Important!
output_dtypes=[int]
)
r = kendall_correlation(da_cli_tmax8, DS_y_us['yield'],'time')
plt.figure(figsize=(20,10))
ax=plt.axes(projection=ccrs.Mercator())
adm1_shapes = list(shpreader.Reader('gadm36_BRA_1.shp').geometries())
stats.sel(linreg_pam=2).plot(x='lon', y='lat',transform=ccrs.PlateCarree(), robust=True,cbar_kwargs={'label': 'Correlation temperature and evaporation'}, cmap='RdBu', levels=15)
loc = plticker.MultipleLocator(base=2.0) # this locator puts ticks at regular intervals
ax.set_xticks(ax.get_xticks()[::1]); ax.set_yticks(ax.get_yticks()[::1])
ax.add_geometries(us1_shapes, ccrs.PlateCarree(),edgecolor='black', facecolor=(0,1,0,0.0))
ax.set_extent([-125,-67,24,50], ccrs.PlateCarree())
plt.show()
import statsmodels.api as sm
def new_linregress(x, y):
regr = LinearRegression()
regr.fit(x, y)
score = regr.score(x, y)
return score
# slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
#return np.array([slope, intercept, r_value, p_value, std_err])
da_y_us = DS_y_us['yield']
da_cli_tmax8 = da_cli_tmax8.reindex(lat=da_cli_tmax8.lat[::-1])
x,y = xr.align(da_cli_tmax8, da_y_us, join="exact") #check with they match
stats = xr.apply_ufunc(new_linregress, [da_cli_tmax8,da_cli_tmax8], da_y_us,
input_core_dims=[['time'], ['time']],
output_core_dims=[["linreg_pam"]],
vectorize=True,
dask="parallelized",
output_dtypes=['float64'],
output_sizes={"linreg_pam": 1},
)