Skip to content

Commit 38a34ea

Browse files
author
Vishnu Sai Karthik Gindi
committed
Added a python script to generate 100m grid cells for sierras region
1 parent 4ff8d67 commit 38a34ea

File tree

3 files changed

+499
-0
lines changed

3 files changed

+499
-0
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
from dask.distributed import Client, LocalCluster
2+
from concurrent.futures import ProcessPoolExecutor
3+
from dask import delayed
4+
import dask.dataframe as dd
5+
import pandas as pd
6+
import math
7+
from pyproj import Transformer
8+
import rioxarray as rxr
9+
import numpy as np
10+
from osgeo import gdal, gdalconst
11+
from math import floor, ceil, sqrt, atan2, rad2deg
12+
from numpy import rad2deg, arctan2, gradient
13+
import dask_jobqueue
14+
import pystac_client
15+
import planetary_computer
16+
from tqdm import tqdm
17+
from dask.diagnostics import ProgressBar
18+
from retrying import retry
19+
20+
def process_single_location(args):
21+
lat, lon, elevation = args
22+
23+
slope = elevation.copy()
24+
aspect = elevation.copy()
25+
26+
transformer = Transformer.from_crs("EPSG:4326", elevation.rio.crs, always_xy=True)
27+
xx, yy = transformer.transform(lon, lat)
28+
29+
tilearray = np.around(elevation.values[0]).astype(int)
30+
geo = (math.floor(float(lon)), 90, 0.0, math.ceil(float(lat)), 0.0, -90)
31+
32+
driver = gdal.GetDriverByName('MEM')
33+
temp_ds = driver.Create('', tilearray.shape[1], tilearray.shape[0], 1, gdalconst.GDT_Float32)
34+
35+
temp_ds.GetRasterBand(1).WriteArray(tilearray)
36+
37+
tilearray_np = temp_ds.GetRasterBand(1).ReadAsArray()
38+
grad_y, grad_x = gradient(tilearray_np)
39+
40+
# Calculate slope and aspect
41+
slope_arr = np.sqrt(grad_x**2 + grad_y**2)
42+
aspect_arr = rad2deg(arctan2(-grad_y, grad_x)) % 360
43+
44+
slope.values[0] = slope_arr
45+
aspect.values[0] = aspect_arr
46+
47+
elev = round(elevation.sel(x=xx, y=yy, method="nearest").values[0])
48+
slop = round(slope.sel(x=xx, y=yy, method="nearest").values[0])
49+
asp = round(aspect.sel(x=xx, y=yy, method="nearest").values[0])
50+
51+
return elev, slop, asp
52+
53+
def process_data_in_chunks(tile_group, tiles):
54+
results = []
55+
56+
index_id = int(tile_group['index_id'].iloc[0])
57+
tile = tiles[index_id]
58+
59+
@retry(stop_max_attempt_number=3, wait_fixed=2000, retry_on_exception=lambda exception: isinstance(exception, IOError))
60+
def fetch_and_process_elevation():
61+
signed_asset = planetary_computer.sign(tile.assets["data"])
62+
elevation = rxr.open_rasterio(signed_asset.href)
63+
64+
for lat, lon in zip(tile_group['lat'], tile_group['lon']):
65+
try:
66+
result = process_single_location((lat, lon, elevation))
67+
results.append(result)
68+
except Exception as e:
69+
print(f"Error processing location (lat: {lat}, lon: {lon}) due to {e}. Skipping...")
70+
71+
fetch_and_process_elevation()
72+
return pd.DataFrame(results, columns=['Elevation_m', 'Slope_Deg', 'Aspect_L'])
73+
74+
def process_data_in_chunks_dask(tile_group, tiles):
75+
76+
index_id = int(tile_group['index_id'].iloc[0])
77+
tile = tiles[index_id]
78+
79+
signed_asset = planetary_computer.sign(tile.assets["data"])
80+
elevation = rxr.open_rasterio(signed_asset.href)
81+
82+
results = [delayed(process_single_location)((lat, lon, elevation)) for lat, lon in zip(tile_group['lat'], tile_group['lon'])]
83+
return pd.DataFrame(results, columns=['Elevation_m', 'Slope_Deg', 'Aspect_L'])
84+
85+
def extract_terrain_data(df):
86+
87+
88+
#cluster = dask_jobqueue.SLURMCluster(local_directory=r'/home/vgindi/slurmcluster')
89+
#cluster.scale(num_workers)
90+
#dask_client = Client(cluster)
91+
92+
client = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1", ignore_conformance=True)
93+
94+
area_of_interest = {'type': 'Polygon',
95+
'coordinates': [[[-120.37693519839556, 36.29213061937931],
96+
[-120.37690215328962, 38.8421802805432],
97+
[-118.29165268221286, 38.84214595220293],
98+
[-118.2917116398743, 36.29209713778364],
99+
[-120.37693519839556, 36.29213061937931]]]}
100+
101+
search = client.search(collections=["cop-dem-glo-90"], intersects=area_of_interest)
102+
103+
tiles = list(search.items())
104+
df = df[['lat', 'lon', 'index_id']]
105+
106+
# Convert the DataFrame to a Dask DataFrame for distributed computing
107+
dask_df = dd.from_pandas(df, npartitions = df['index_id'].nunique())
108+
109+
# Process each partition (grouped by 'index_id') in parallel
110+
results = dask_df.groupby('index_id').apply(lambda group: process_data_in_chunks(group, tiles),
111+
meta=pd.DataFrame(columns=['Elevation_m', 'Slope_Deg', 'Aspect_L']))
112+
with ProgressBar():
113+
result_df = results.compute()
114+
115+
#dask_client.close()
116+
#cluster.close()
117+
118+
final_df = pd.concat([df.reset_index(drop=True), result_df.reset_index(drop=True)], axis=1)
119+
return result_df
Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
import math
2+
import numpy as np
3+
import pandas as pd
4+
import geopandas as gpd
5+
from pyproj import CRS, Transformer
6+
import planetary_computer
7+
from pystac_client import Client
8+
import planetary_computer
9+
import gdal
10+
from gdal import gdalconst
11+
from shapely import Point, Polygon
12+
from shapely.geometry import box
13+
from concurrent.futures import ThreadPoolExecutor, as_completed
14+
15+
def generate_grids(bounding_box):
16+
"""
17+
Generates 100m resolution grid cells for low sierras regions uisng bounding box coordinates. Do not run this code if you have the file downloaded
18+
this might take longer to run this.
19+
"""
20+
#gdf_shapefile = gpd.read_file(shapefile_path)
21+
## Get bounding box coordinates
22+
#minx, miny, maxx, maxy = gdf_shapefile.total_bounds
23+
24+
minx, miny, maxx, maxy = *bounding_box[0], *bounding_box[1]
25+
bbox_polygon = box(minx, miny, maxx, maxy)
26+
27+
#buffer the bounds
28+
minx = minx-1
29+
maxx = maxx+1
30+
miny = miny-1
31+
maxy = maxy+1
32+
33+
# Define the source and target coordinate reference systems
34+
src_crs = CRS('EPSG:4326') # WGS84
35+
36+
if -126 < minx < -120:
37+
# transformer = Transformer.from_crs(src_crs, 'epsg:32610', always_xy=True)
38+
target_crs = CRS('EPSG:32610') #UTM zone 10
39+
elif -120 < minx < -114:
40+
# transformer = Transformer.from_crs(src_crs, 'epsg:32611', always_xy=True)
41+
target_crs = CRS('EPSG:32611') #UTM zone 11
42+
elif -114 < minx < -108:
43+
# transformer = Transformer.from_crs(src_crs, 'epsg:32612', always_xy=True)
44+
target_crs = CRS('EPSG:32612') #UTM zone 12
45+
elif -108 < minx < -102:
46+
# transformer = Transformer.from_crs(src_crs, 'epsg:32613', always_xy=True)
47+
target_crs = CRS('EPSG:32613') #UTM zone 13
48+
else:
49+
# transformer = Transformer.from_crs(src_crs, target_crs, always_xy=True)
50+
target_crs = CRS('EPSG:3857') #Web Mercator
51+
52+
transformer = Transformer.from_crs(src_crs, target_crs, always_xy=True)
53+
54+
# Convert the bounding box coordinates to Web Mercator
55+
minx, miny = transformer.transform(minx, miny)
56+
maxx, maxy = transformer.transform(maxx, maxy)
57+
58+
# set the grid cell size in meters
59+
cell_size = 100
60+
61+
# Calculate the number of cells in x and y directions
62+
num_cells_x = int((maxx-minx)/cell_size)
63+
num_cells_y = int((maxy-miny)/cell_size)
64+
65+
# Calculate the total grid width and height
66+
grid_width = num_cells_x*cell_size
67+
grid_height = num_cells_y*cell_size
68+
69+
# Calculate the offset to center the grid
70+
offset_x = ((maxx-minx)-grid_width)/2
71+
offset_y = ((maxy-miny)-grid_height)/2
72+
73+
# Generate latitude and longitude ranges
74+
lon_range = np.linspace(minx + offset_x, maxx - offset_x, num=num_cells_x)
75+
lat_range = np.linspace(miny + offset_y, maxy - offset_y, num=num_cells_y)
76+
77+
# Create a grid of points
78+
points = []
79+
for lon in lon_range:
80+
for lat in lat_range:
81+
points.append((lon, lat))
82+
83+
# Convert the coordinate pairs back to WGS84
84+
back_transformer = Transformer.from_crs(target_crs, src_crs, always_xy=True)
85+
target_coordinates = [back_transformer.transform(lon, lat) for lon, lat in points]
86+
87+
# Create a list of Shapely Point geometries
88+
coords = [Point(lon, lat) for lon, lat in target_coordinates]
89+
90+
# Create a GeoDataFrame from the points
91+
gdf_points = gpd.GeoDataFrame(geometry=coords)
92+
93+
# set CRS to WGS84
94+
gdf_points=gdf_points.set_crs('epsg:4326')
95+
# Clip the points to the shapefile boundary
96+
gdf_clipped_points = gpd.clip(gdf_points, bbox_polygon)
97+
# Specify the output points shapefile path
98+
output_shapefile = r'/home/vgindi/Provided_Data/low_sierras_points.shp'#######
99+
# Export the clipped points to a shapefile
100+
gdf_clipped_points.to_file(output_shapefile)
101+
print("Regional Grid Created")
102+
103+
#Create Submission format .csv for SWE predictions
104+
gdf_clipped_points.index.names = ['cell_id']
105+
Geospatial_df = pd.DataFrame()
106+
Geospatial_df['lon']= gdf_clipped_points['geometry'].x
107+
Geospatial_df['lat']= gdf_clipped_points['geometry'].y
108+
109+
### Begin process to import geospatial features into DF
110+
min_lon = min(Geospatial_df['lon'])
111+
min_lat = min(Geospatial_df['lat'])
112+
113+
# Define the source and target coordinate reference systems
114+
src_crs = CRS('EPSG:4326') # WGS84
115+
if -126 < min_lon < -120:
116+
# transformer = Transformer.from_crs(src_crs, 'epsg:32610', always_xy=True)
117+
target_crs = CRS('EPSG:32610') #UTM zone 10
118+
elif -120 < min_lon < -114:
119+
# transformer = Transformer.from_crs(src_crs, 'epsg:32611', always_xy=True)
120+
target_crs = CRS('EPSG:32611') #UTM zone 11
121+
elif -114 < min_lon < -108:
122+
# transformer = Transformer.from_crs(src_crs, 'epsg:32612', always_xy=True)
123+
target_crs = CRS('EPSG:32612') #UTM zone 12
124+
elif -108 < min_lon < -102:
125+
# transformer = Transformer.from_crs(src_crs, 'epsg:32613', always_xy=True)
126+
target_crs = CRS('EPSG:32613') #UTM zone 13
127+
else:
128+
# transformer = Transformer.from_crs(src_crs, target_crs, always_xy=True)
129+
target_crs = CRS('EPSG:3857') #Web Mercator
130+
131+
transformer = Transformer.from_crs(src_crs, target_crs, always_xy=True)
132+
133+
# Convert the bounding box coordinates to Web Mercator
134+
Geospatial_df['lon_m'], Geospatial_df['lat_m'] = transformer.transform(Geospatial_df['lon'].to_numpy(), Geospatial_df['lat'].to_numpy())
135+
geocols=['BR_Coord_Long', 'BR_Coord_Lat', 'UR_Coord_Long', 'UR_Coord_Lat',
136+
'UL_Coord_Long', 'UL_Coord_Lat', 'BL_Coord_Long', 'BL_Coord_Lat']
137+
138+
Geospatial_df = Geospatial_df.reindex(columns=[*Geospatial_df.columns.tolist(), *geocols], fill_value=0)
139+
Geospatial_df.reset_index(drop=True, inplace=True)
140+
Geospatial_df = Geospatial_df.assign(BR_Coord_Long=lambda x: x.lon_m + 50,
141+
BR_Coord_Lat=lambda x: x.lat_m - 50,
142+
UR_Coord_Long=lambda x: x.lon_m + 50,
143+
UR_Coord_Lat=lambda x: x.lat_m + 50,
144+
UL_Coord_Long=lambda x: x.lon_m - 50,
145+
UL_Coord_Lat=lambda x: x.lat_m + 50,
146+
BL_Coord_Long=lambda x: x.lon_m - 50,
147+
BL_Coord_Lat=lambda x: x.lat_m - 50,)
148+
149+
transformer = Transformer.from_crs(target_crs, src_crs, always_xy=True)
150+
#Geospatial_df['lon_m'], Geospatial_df['lat_m'] = transformer.transform(Geospatial_df['lon'].to_numpy(), Geospatial_df['lat'].to_numpy())
151+
Geospatial_df['BR_Coord_Long'], Geospatial_df['BR_Coord_Lat']=transformer.transform(Geospatial_df['BR_Coord_Long'].to_numpy(), Geospatial_df['BR_Coord_Lat'].to_numpy())
152+
Geospatial_df['UR_Coord_Long'], Geospatial_df['UR_Coord_Lat']=transformer.transform(Geospatial_df['UR_Coord_Long'].to_numpy(), Geospatial_df['UR_Coord_Lat'].to_numpy())
153+
Geospatial_df['UL_Coord_Long'], Geospatial_df['UL_Coord_Lat']=transformer.transform(Geospatial_df['UL_Coord_Long'].to_numpy(), Geospatial_df['UL_Coord_Lat'].to_numpy())
154+
Geospatial_df['BL_Coord_Long'], Geospatial_df['BL_Coord_Lat']=transformer.transform(Geospatial_df['BL_Coord_Long'].to_numpy(), Geospatial_df['BL_Coord_Lat'].to_numpy())
155+
print(Geospatial_df.columns)
156+
Geospatial_df['cell_id'] = Geospatial_df.apply(lambda x: f"11N_cell_{x['lon']}_{x['lat']}", axis=1)
157+
158+
client = Client.open(
159+
"https://planetarycomputer.microsoft.com/api/stac/v1",
160+
ignore_conformance=True,
161+
)
162+
163+
search = client.search(
164+
collections=["cop-dem-glo-90"],
165+
intersects = {
166+
"type": "Polygon",
167+
"coordinates": [[
168+
[min_x, min_y],
169+
[max_x, min_y],
170+
[max_x, max_y],
171+
[min_x, max_y],
172+
[min_x, min_y]
173+
]]})
174+
175+
tiles = list(search.items())
176+
regions = pd.DataFrame([(i, tile.id) for i, tile in enumerate(tiles)], columns=['sliceID', 'tileID']).set_index('tileID')
177+
178+
Geospatial_df['tile_id'] = Geospatial_df.apply(lambda x: 'Copernicus_DSM_COG_30_N' + str(math.floor(x['lat'])) + '_00_W' + str(math.ceil(abs(x['lon']))) + '_00_DEM', axis=1)
179+
180+
regions_dict = regions['sliceID'].to_dict()
181+
Geospatial_df['index_id'] = Geospatial_df['tile_id'].map(regions_dict)
182+
Geospatial_df = Geospatial_df.drop(columns = ['tile_id'], axis = 1)
183+
184+
return Geospatial_df[['cell_id', 'lon', 'lat', 'BR_Coord_Long', 'BR_Coord_Lat', 'UR_Coord_Long', 'UR_Coord_Lat',
185+
'UL_Coord_Long', 'UL_Coord_Lat', 'BL_Coord_Long', 'BL_Coord_Lat', 'index_id']]
186+
187+
if __name__ = "__main__":
188+
bounding_box = ((-120.3763448720203, 36.29256774541929), (-118.292253412863, 38.994985247736324))
189+
grid_cells_meta = generate_grids(bounding_box)
190+
grid_cells_meta.to_csv(r'/home/vgindi/Provided_Data/grid_cells_meta.csv')

0 commit comments

Comments
 (0)