Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions prepare_layers/convert_crosswalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ def convert_crosswalk(
for code in codes:
res.append([hab, code])

# This addition is for the islands correction layer, which will
# be added as an effective lcc_0.tif
res.append(["islands", "0"])

df = pd.DataFrame(res, columns=["code", "value"])
df.to_csv(output_path, index=False)

Expand Down
40 changes: 40 additions & 0 deletions prepare_layers/remove_nans_from_mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import argparse
import os
from pathlib import Path

import yirgacheffe as yg

def remove_nans_from_mask(
input_path: Path,
output_path: Path,
) -> None:
os.makedirs(output_path.parent, exist_ok=True)
with yg.read_raster(input_path) as layer:
converted = layer.nan_to_num()
converted.to_geotiff(output_path)

def main() -> None:
parser = argparse.ArgumentParser(description="Convert NaNs to zeros in mask layers")
parser.add_argument(
'--original',
type=Path,
help="Original raster",
required=True,
dest="original_path",
)
parser.add_argument(
'--output',
type=Path,
help='Destination raster',
required=True,
dest='output_path',
)
args = parser.parse_args()

remove_nans_from_mask(
args.original_path,
args.output_path
)

if __name__ == "__main__":
main()
15 changes: 14 additions & 1 deletion prepare_species/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
"threats",
"category",
"category_weight",
"geometry"
"geometry",
"in_star",
]

# From Muir et al: For each species, a global STAR threat abatement (START) score
Expand Down Expand Up @@ -84,6 +85,8 @@ class SpeciesReport:
"keeps_habitats",
"has_geometries",
"keeps_geometries",
"has_category",
"in_star",
"filename",
]

Expand Down Expand Up @@ -224,6 +227,16 @@ def tidy_reproject_save(
os.makedirs(output_directory_path, exist_ok=True)
output_path = output_directory_path / f"{grow.id_no}.geojson"
res = gpd.GeoDataFrame(grow.to_frame().transpose(), crs=src_crs, geometry="geometry")

# Ensure proper dtypes for JSON serialization
int_columns = ['id_no', 'assessment_id', 'assessment_year', 'elevation_lower', 'elevation_upper', 'category_weight']
for col in int_columns:
if col in res.columns:
res[col] = res[col].astype('Int64') # Nullable integer type

if 'in_star' in res.columns:
res['in_star'] = res['in_star'].astype(bool)

res_projected = res.to_crs(target_crs)
res_projected.to_file(output_path, driver="GeoJSON")
report.filename = output_path
60 changes: 43 additions & 17 deletions prepare_species/extract_species_data_psql.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@
logging.basicConfig()
logger.setLevel(logging.DEBUG)

# Note that this query returns more species that would be accepted into STAR based on
# threat category, but for model checking we want all AOHs for a given taxa, so the
# pipeline must process them all, and then just include the correct set when processing
# STAR
MAIN_STATEMENT = """
SELECT
assessments.sis_taxon_id as id_no,
Expand All @@ -54,7 +58,7 @@
AND taxons.class_name = %s
AND taxons.infra_type is NULL -- no subspecies
AND taxons.metadata->>'taxon_level' = 'Species'
AND red_list_category_lookup.code IN ('NT', 'VU', 'EN', 'CR')
AND red_list_category_lookup.code IN ('DD', 'LC', 'NT', 'VU', 'EN', 'CR')
"""

SYSTEMS_STATEMENT = """
Expand Down Expand Up @@ -138,20 +142,10 @@ def process_row(
presence += (4,)
report.possibly_extinct = True # pylint: disable=W0201

cursor.execute(SYSTEMS_STATEMENT, (assessment_id,))
systems_data = cursor.fetchall()
try:
systems = process_systems(systems_data, report)
except ValueError as exc:
logger.debug("Dropping %s: %s", id_no, str(exc))
return report

cursor.execute(THREATS_STATEMENT, (assessment_id,))
raw_threats = cursor.fetchall()
threats = process_threats(raw_threats, report)
if len(threats) == 0:
return report
include_in_star = category in ('NT', 'VU', 'EN', 'CR')
report.has_category = include_in_star

# First checks are the ones that rule out being able to make an AOH at all
cursor.execute(HABITATS_STATEMENT, (assessment_id,))
raw_habitats = cursor.fetchall()
try:
Expand All @@ -170,6 +164,37 @@ def process_row(
except ValueError as _exc:
return report

# Second checks are whether it is good for STAR, so from hereon we should
# output a GeoJSON regardless as we should make an AOH for validation with this
cursor.execute(SYSTEMS_STATEMENT, (assessment_id,))
systems_data = cursor.fetchall()
try:
systems = process_systems(systems_data, report)
except ValueError as exc:
logger.debug("Dropping %s: %s", id_no, str(exc))
include_in_star = False
try:
systems = systems_data[0][0]
except IndexError:
systems = []

cursor.execute(THREATS_STATEMENT, (assessment_id,))
raw_threats = cursor.fetchall()
threats = process_threats(raw_threats, report)
if len(threats) == 0:
include_in_star = False

report.in_star = include_in_star

try:
category_weight = CATEGORY_WEIGHTS[category]
except KeyError:
assert include_in_star is False
category_weight = 0

# This is a fix as per the method to include the missing islands layer:
habitats_list = list(habitats) + ["islands"]

gdf = gpd.GeoDataFrame(
[[
id_no,
Expand All @@ -179,14 +204,15 @@ def process_row(
systems,
int(elevation_lower) if elevation_lower is not None else None,
int(elevation_upper) if elevation_upper is not None else None,
'|'.join(list(habitats)),
'|'.join(habitats_list),
scientific_name,
family_name,
class_name,
json.dumps(threats),
category,
CATEGORY_WEIGHTS[category],
geometry
category_weight,
geometry,
include_in_star,
]],
columns=COLUMNS,
crs=target_projection or 'epsg:4326'
Expand Down
4 changes: 3 additions & 1 deletion prepare_species/extract_species_data_redlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ def process_species(
logger.debug("Dropping %s: %s", id_no, exc)
return report

habitats_list = list(habitats) + ["islands"]

# Create GeoDataFrame with all data
gdf = gpd.GeoDataFrame(
[[
Expand All @@ -253,7 +255,7 @@ def process_species(
systems,
elevation_lower,
elevation_upper,
'|'.join(list(habitats)),
'|'.join(habitats_list),
scientific_name,
family_name,
class_name,
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ pymer4
pyproj
scikit-image
redlistapi
yirgacheffe>=1.9
aoh[validation]>=1.0
yirgacheffe>=1.12,<2.0
aoh[validation]>=2.0.1,<3.0

# GDAL should be installed manually to match the version of the library installed on your machine
gdal[numpy]
Expand Down
43 changes: 23 additions & 20 deletions scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,32 +63,20 @@ if [ ! -d "${DATADIR}"/habitat_layers ]; then

echo "Processing habitat map..."
aoh-habitat-process --habitat "${DATADIR}"/habitat/raw.tif \
--scale 1000.0 \
--scale 992.292720200000133 \
--projection "ESRI:54009" \
--output "${DATADIR}"/tmp_habitat_layers/current
mv "${DATADIR}"/tmp_habitat_layers "${DATADIR}"/habitat_layers
fi

if [ ! -d "${DATADIR}"/masks ]; then
echo "Processing masks..."
python3 ./prepare_layers/make_masks.py --habitat_layers "${DATADIR}"/habitat_layers/current \
--output_directory "${DATADIR}"/masks
if [ ! -f "${DATADIR}"/habitat_layers/current/lcc_0.tif ]; then
cp "${DATADIR}/Zenodo/MissingLandcover_1km_cover.tif" "${DATADIR}"/habitat_layers/current/lcc_0.tif
fi

# Fetch and prepare the elevation layers
if [[ ! -f "${DATADIR}"/elevation/elevation-max-1k.tif || ! -f "${DATADIR}"/elevation/elevation-min-1k.tif ]]; then
if [ ! -f "${DATADIR}"/elevation/elevation.tif ]; then
echo "Fetching elevation map..."
reclaimer zenodo --zenodo_id 5719984 --filename dem-100m-esri54017.tif --output "${DATADIR}"/elevation/elevation.tif
fi
if [ ! -f "${DATADIR}"/elevation/elevation-max-1k.tif ]; then
echo "Generating elevation max layer..."
gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r max -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation/elevation.tif "${DATADIR}"/elevation/elevation-max-1k.tif
fi
if [ ! -f "${DATADIR}"/elevation/elevation-min-1k.tif ]; then
echo "Generating elevation min layer..."
gdalwarp -t_srs ESRI:54009 -tr 1000 -1000 -r min -co COMPRESS=LZW -wo NUM_THREADS=40 "${DATADIR}"/elevation/elevation.tif "${DATADIR}"/elevation/elevation-min-1k.tif
fi
if [ ! -f "${DATADIR}"/masks/CGLS100Inland_withGADMIslands.tif ]; then
echo "Processing masks..."
python3 ./prepare_layers/remove_nans_from_mask.py --original "${DATADIR}"/Zenodo/CGLS100Inland_withGADMIslands.tif \
--output "${DATADIR}"/masks/CGLS100Inland_withGADMIslands.tif
fi

# Generate the crosswalk table
Expand All @@ -102,7 +90,10 @@ for TAXA in "${TAXALIST[@]}"
do
if [ ! -d "${DATADIR}"/species-info/"${TAXA}"/ ]; then
echo "Extracting species data for ${TAXA}..."
python3 ./prepare_species/extract_species_data_psql.py --class "${TAXA}" --output "${DATADIR}"/species-info/"${TAXA}"/ --projection "ESRI:54009" --excludes "${DATADIR}"/SpeciesList_generalisedRangePolygons.csv
python3 ./prepare_species/extract_species_data_psql.py --class "${TAXA}" \
--output "${DATADIR}"/species-info/"${TAXA}"/ \
--projection "ESRI:54009" \
--excludes "${DATADIR}"/SpeciesList_generalisedRangePolygons.csv
fi
done

Expand Down Expand Up @@ -133,6 +124,18 @@ aoh-collate-data --aoh_results "${DATADIR}"/aohs/current/ \
echo "Calculating model validation..."
aoh-validate-prevalence --collated_aoh_data "${DATADIR}"/validation/aohs.csv \
--output "${DATADIR}"/validation/model_validation.csv
for TAXA in "${TAXALIST[@]}"
do
echo "Fetching GBIF data for ${TAXA}..."
aoh-fetch-gbif-data --collated_aoh_data "${DATADIR}"/validation/aohs.csv \
--taxa "${TAXA}" \
--output_dir "${DATADIR}"/validation/occurrences/
echo "Validating occurrences for ${TAXA}..."
aoh-validate-occurrences --gbif_data_path "${DATADIR}"/validation/occurrences/"${TAXA}" \
--species_data "${DATADIR}"/species-info/"${TAXA}"/current/ \
--aoh_results "${DATADIR}"/aohs/current/"${TAXA}"/ \
--output "${DATADIR}"/validation/occurrences/"${TAXA}".csv
done

# Threats
echo "Generating threat task list..."
Expand Down
10 changes: 5 additions & 5 deletions utils/aoh_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,21 @@ def aoh_generator(
for species_path in species_paths:
res.append([
data_dir / "habitat_layers" / scenario,
data_dir / "elevation" / "elevation-max-1k.tif",
data_dir / "elevation" / "elevation-min-1k.tif",
data_dir / "Zenodo" / "FABDEM_1km_max_patched.tif",
data_dir / "Zenodo" / "FABDEM_1km_min_patched.tif",
data_dir / "crosswalk.csv",
species_path,
data_dir / "masks" / "terrestrial_mask.tif",
data_dir / "masks" / "CGLS100Inland_withGADMIslands.tif",
data_dir / "aohs" / scenario / taxa_dir_path.name,
])

df = pd.DataFrame(res, columns=[
'--habitats',
'--fractional_habitats',
'--elevation-max',
'--elevation-min',
'--crosswalk',
'--speciesdata',
'--area',
'--weights',
'--output'
])
output_dir, _ = os.path.split(output_csv_path)
Expand Down
17 changes: 12 additions & 5 deletions utils/threats_generator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3

import argparse
import json
import os
from pathlib import Path

Expand All @@ -23,11 +24,17 @@ def threats_generator(
taxon_id = species_path.stem
aoh_path = data_dir / "aohs" / source / taxa_dir_path.name / f"{taxon_id}_all.tif"
if aoh_path.exists():
res.append([
species_path,
aoh_path,
data_dir / "threat_rasters" / taxa_dir_path.name,
])
# Check if species should be included in STAR. We don't use geopandas here
# because it will parse the geometry, which can be quite slow, and we just
# want a single bool field
with open(species_path, 'r', encoding="UTF-8") as f:
data = json.load(f)
if data['features'][0]['properties']['in_star']:
res.append([
species_path,
aoh_path,
data_dir / "threat_rasters" / taxa_dir_path.name,
])

df = pd.DataFrame(res, columns=[
'--speciesdata',
Expand Down