-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclimate_data_helper.py
167 lines (155 loc) · 5.8 KB
/
climate_data_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 6 16:21:21 2018
@author: willi
"""
import os
from osgeo import gdal, ogr
import numpy as np
import datetime
data_directory = "/Users/lucasbraun/Google Drive/Grad School/Semester 2/Data into Knowledge/TDTK Final Project/Data/netCDF/"
# data_directory is the location of the netCDF files that you want to add
# The method only handles files in the following format right now:
# - tmin.2018.nc
# - tmax.2018.nc
# - precip.2018.nc
def add_climate_data(shp_filename, data_directory, year=2018, variables=["tmin", "tmax", "percip"]):
for variable in variables:
netcdf_filename = f"{data_directory}{variable}.{year}.nc"
masknetcdf(netcdf_filename, shp_filename, variable)
def masknetcdf(netcdf_filename, shp_filename, cl_field=None):
if cl_field == None:
# If no field is provided, assume first part of file name is the field to use
head, tail = os.path.split(netcdf_filename)
cl_field = tail.split(".")[0]
# Open dataset
data_source = gdal.Open(netcdf_filename)
# Number of columns of each layer
cols =data_source.RasterXSize
#Number of rows of each layer
rows = data_source.RasterYSize
# Number of bands of each layer
nbands = data_source.RasterCount
# Year of the netcdf file
year = int(netcdf_filename.split('.')[-2])
#========================================
# Importing data by bands as a dictionary
#========================================
datach = {}
# First day of the year as reference
yearc = str(year) + "-01-01"
# Converting year in a datatime object
fi_date = datetime.datetime.strptime(yearc,"%Y-%m-%d").date()
# For loop of the layers that contains the netcdf file
for i in range(1,nbands+1):
index_date = fi_date + datetime.timedelta(days=i-1)
in_band = data_source.GetRasterBand(i)
# Converting default nan in np.na
in_band_wnan = in_band.ReadAsArray()
in_band_wnan[in_band_wnan==-9.9692100e+36] = np.nan
datach[index_date.strftime("%Y-%m-%d")] = in_band_wnan
#====================================
# Parameters of transformation
#====================================
x = 0.5*cols-rows
delta_Lon = 360/cols
delta_Lat = 180/(rows+x)
if x==0:
LonUpper_Left = 0
LatUpper_Left = 90
else:
LonUpper_Left = -delta_Lon/2
LatUpper_Left = delta_Lat*(rows/2)
gt = [LonUpper_Left,delta_Lon, 0,LatUpper_Left,0,-delta_Lat ]
# Inverse affine transformation
inv_gt = gdal.InvGeoTransform(gt)
inv_gt
#=====================================================================
# Importing shapefile and transforming coordinates to rows and columns
#=====================================================================
# Calling driver
driver = ogr.GetDriverByName('ESRI Shapefile')
# Open dataset
datasource1 = driver.Open(shp_filename,1)
# Get layer
layer = datasource1.GetLayer()
# Get number of features
fc = layer.GetFeatureCount()
# Creating lists for rows, columns and time index
xt_p = []
yt_p = []
dayc = []
for fea in layer:
# Time
a = fea.GetField("timestamp")[0:10]
dayc.append(a)
# Coord
pt = fea.geometry()
# Transforming coord
xt, yt = gdal.ApplyGeoTransform(inv_gt,pt.GetX(),pt.GetY())
xt_p.append(xt)
yt_p.append(yt)
#=================================
# Extracting values by time
#=================================
# Converting list of days to array
array_indexday = np.array(dayc)
# Creating a vector of nan values
value = np.zeros(fc)
value[value==0] = np.nan
# For loop to extract values by time
for j in np.unique(dayc):
# Condition about the year of interest
year_d = datetime.datetime.strptime(j,"%Y-%m-%d").year
if year_d == year:
# Match day of the features with the day of the band array
index = np.where(array_indexday == j)[0].tolist()
in_band = datach[j]
# Extracting values of the layer that correspond to certain time
for k in range(0,len(index)):
data = in_band[int(yt_p[index[k]]),int(xt_p[index[k]])]
value[index[k]] = data
else:
continue
#============================
# Loading values to shapefile
#============================
# Calling the shapefile again but in edition version (1)
driver = ogr.GetDriverByName('ESRI Shapefile')
datasource2 = driver.Open(shp_filename,1)
layer2 = datasource2.GetLayer()
# Reset index
datasource2.SyncToDisk()
# List of the fields of the shapefile
schema = []
ldefn = layer2.GetLayerDefn()
for n in range(ldefn.GetFieldCount()):
fdefn = ldefn.GetFieldDefn(n)
schema.append(fdefn.name)
# So, if the field exists this function updates the field, otherwise, it will create
# A new field with the specified name
if np.any(np.array(schema) == cl_field):
i=0
# For loop quering the existency of the value for certain period
for fe in layer2:
year_2 = datetime.datetime.strptime(dayc[i],"%Y-%m-%d").year
if year_2 == year:
fe.SetField(cl_field,value[i])
layer2.SetFeature(fe)
i = i + 1
else:
# Creating the the new field
new_field = ogr.FieldDefn(cl_field,ogr.OFTReal)
layer2.CreateField(new_field)
i=0
for fe in layer2:
fe.SetField(cl_field,value[i])
layer2.SetFeature(fe)
i = i + 1
# Destroy data source shapefile
datasource1.Destroy()
datasource2.Destroy()
# Close source netcdf
data_source= None
del value
print(f"Done! Year: {year}; Field {cl_field}")