Skip to content

Commit

Permalink
Adapted sampling strategy #80
Browse files Browse the repository at this point in the history
  • Loading branch information
yellowcap committed Dec 7, 2023
1 parent 12cc466 commit 3139c41
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 23 deletions.
42 changes: 22 additions & 20 deletions scripts/landcover.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,18 @@
required=True,
type=str,
)
def process(wd, worldcover, mgrs):
@click.option(
"--skip-stats",
is_flag=True,
required=False,
default=False,
)
def process(wd, worldcover, mgrs, skip_stats):
"""
Run statistics and sampling.
"""
compute_stats(wd, worldcover, mgrs)
if not skip_stats:
compute_stats(wd, worldcover, mgrs)
sample(wd)


Expand Down Expand Up @@ -162,23 +169,18 @@ def sample(wd):
data_norm = percentages(data.loc[:, data.columns != "count"])
data[data_norm.columns] = data_norm

diversity = split_highest(data, "count", 200, 2000)
urban = split_highest(data, "Built-up", 200)
wetland = split_highest(data, "Herbaceous wetland", 50)
mangroves = split_highest(data, "Mangroves", 50)
moss = split_highest(data, "Moss and lichen", 50)
cropland = split_highest(data, "Cropland", 50)
trees = split_highest(data, "Tree cover", 50)
shrubland = split_highest(data, "Shrubland", 50)
grassland = split_highest(data, "Grassland", 50)
bare = split_highest(data, "Bare / sparse vegetation", 50)
snow = split_highest(data, "Snow and Ice", 50)

selector = numpy.logical_and(
data["Permanent water bodies"] > WATER_LOWER_TH,
data["Permanent water bodies"] < WATER_UPPER_TH,
)
water = data[selector].sample(100, random_state=RANDOM_SEED)
diversity = split_highest(data, "count", 500, 3000)
urban = split_highest(data, "Built-up", 400)
wetland = split_highest(data, "Herbaceous wetland", 50, 500)
mangroves = split_highest(data, "Mangroves", 50, 500)
moss = split_highest(data, "Moss and lichen", 50, 500)
cropland = split_highest(data, "Cropland", 100, 500)
trees = split_highest(data, "Tree cover", 100, 500)
shrubland = split_highest(data, "Shrubland", 50, 500)
grassland = split_highest(data, "Grassland", 50, 500)
bare = split_highest(data, "Bare / sparse vegetation", 50, 500)
snow = split_highest(data, "Snow and Ice", 50, 500)
water = split_highest(data, "Permanent water bodies", 100, 1000)

result = pandas.concat(
[
Expand All @@ -199,7 +201,7 @@ def sample(wd):

result = result.drop_duplicates(subset=["name"])

result.to_file(Path(wd, "mgrs_sample.geojson", driver="GeoJSON"))
result.to_file(Path(wd, "mgrs_sample.fgb", driver="FlatGeobuf"))


if __name__ == "__main__":
Expand Down
12 changes: 9 additions & 3 deletions scripts/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import subprocess
import tempfile

import numpy as np
import rasterio
import rioxarray # noqa: F401
import xarray as xr
Expand All @@ -35,11 +36,16 @@ def filter_clouds_nodata(tile):
- bool: True if the tile is approved, False if rejected.
"""
# Check for nodata pixels
nodata_pixel_count = int(tile.sel(band="B02").isin([NODATA]).sum())
if nodata_pixel_count:
print("Too much no-data")
if int(tile.sel(band="B02").isin([NODATA]).sum()):
print("Too much no-data in B02")
return False

bands_to_check = ["vv", "vh", "dem"]
for band in bands_to_check:
if int(np.isnan(tile.sel(band=band)).sum()):
print(f"Too much no-data in {band}")
return False

# Check for cloud coverage
cloudy_pixel_count = int(tile.sel(band="SCL").isin(SCL_FILTER).sum())
if cloudy_pixel_count / PIXELS_PER_TILE >= BAD_PIXEL_MAX_PERCENTAGE:
Expand Down

0 comments on commit 3139c41

Please sign in to comment.