Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

building and poi scripts about omf #12

Merged
merged 12 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
MatRouillard marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ __pycache__/
cache/
.streamlit/
tiles
other.py
other.py

# ignore data dir
Bash/**/data/**
Bash/**/tsv/**
Bash/**/overturemaps-py/**
19 changes: 19 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"editor.formatOnSave": true,
"editor.tabSize": 2,
"editor.defaultFormatter": "esbenp.prettier-vscode",
"python.testing.unittestArgs": ["-v", "-s", ".", "-p", "test_*.py"],
"python.testing.pytestEnabled": false,
"python.testing.unittestEnabled": true,
"black-formatter.args": ["--line-length", "120"],
"flake8.args": ["--max-line-length", "120"],
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"[js]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[sql]": {
"editor.defaultFormatter": "inferrinizzard.prettier-sql-vscode"
}
}
40 changes: 40 additions & 0 deletions Bash/OMF/Analyze/copy_place_category_main_to_tsv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# Initialize the command in seconds.
SECONDS=0

# move to working directory
SCRIPT_DIR=$(cd $(dirname $0); pwd)
cd "${SCRIPT_DIR}"

# make tsv dir
mkdir -p "${SCRIPT_DIR}/tsv"

# analyze
cities=("tokyo" "tateyama" "hamamatsu" "higashi_hiroshima" "kumamoto" "morioka")

for city in "${cities[@]}"; do
echo $city
psql -d postgres -c "select
count(categories->>'main') as category_cnt
from omf.${city}_place
where categories->>'main' is not null"
psql -d postgres -c "\copy (
select
distinct categories->>'main' as main_category,
count(*) as category_cnt
from omf.${city}_place
where categories->>'main' is not null
group by categories->>'main'
order by category_cnt desc
) to './tsv/omf_place_category_main_$city.tsv'
with csv delimiter E'\t';"
done

# Display the measurement time.
time=$SECONDS
((sec=time%60, min=(time%3600)/60, hrs=time/3600))
timestamp=$(printf "%d:%02d:%02d" "$hrs" "$min" "$sec")
echo "Processing time is $timestamp"

exit 0
81 changes: 81 additions & 0 deletions Bash/OMF/Analyze/copy_place_cnt_to_tsv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/bin/bash

# Initialize the command in seconds.
SECONDS=0

# move to working directory
SCRIPT_DIR=$(cd $(dirname $0); pwd)
cd "${SCRIPT_DIR}"

# make tsv dir
mkdir -p "${SCRIPT_DIR}/tsv"

# analyze
cities=("tokyo" "tateyama" "hamamatsu" "higashi_hiroshima" "kumamoto" "morioka")
output="./tsv/omf_place_cnt.tsv"
rm $output

# export header
psql -d postgres -c "\copy (
select
'$city' as city,
count(ogc_fid) as ogc_fid_cnt,
count(wkb_geometry) as wkb_geometry_cnt,
count(id) as id_cnt,
count(version) as version_cnt,
count(update_time) as update_time_cnt,
count(sources) as sources_cnt,
count(names) as names_cnt,
count(categories) as categories_cnt,
count(confidence) as confidence_cnt,
count(websites) as websites_cnt,
count(emails) as emails_cnt,
count(socials) as socials_cnt,
count(phones) as phones_cnt,
count(addresses) as addresses_cnt,
count(brand) as brand_cnt
from
omf.${city}_place
where id is null
) to 'omf.tmp'
with csv header delimiter E'\t';"
cat omf.tmp >> "$output"

# export data
for city in "${cities[@]}"; do
echo $city
psql -d postgres -c "\copy (
select
'$city' as city,
count(ogc_fid) as ogc_fid_cnt,
count(wkb_geometry) as wkb_geometry_cnt,
count(id) as id_cnt,
count(version) as version_cnt,
count(update_time) as update_time_cnt,
count(sources) as sources_cnt,
count(names) as names_cnt,
count(categories) as categories_cnt,
count(confidence) as confidence_cnt,
count(websites) as websites_cnt,
count(emails) as emails_cnt,
count(socials) as socials_cnt,
count(phones) as phones_cnt,
count(addresses) as addresses_cnt,
count(brand) as brand_cnt
from
omf.${city}_place
) to 'omf.tmp'
with csv delimiter E'\t';"
cat omf.tmp >> "$output"
done

# rm tmp
rm omf.tmp

# Display the measurement time.
time=$SECONDS
((sec=time%60, min=(time%3600)/60, hrs=time/3600))
timestamp=$(printf "%d:%02d:%02d" "$hrs" "$min" "$sec")
echo "Processing time is $timestamp"

exit 0
48 changes: 48 additions & 0 deletions Bash/OMF/Analyze/copy_place_source_to_tsv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

# Initialize the command in seconds.
SECONDS=0

# move to working directory
SCRIPT_DIR=$(cd $(dirname $0); pwd)
cd "${SCRIPT_DIR}"

# make tsv dir
mkdir -p "${SCRIPT_DIR}/tsv"

# analyze
cities=("tokyo" "tateyama" "hamamatsu" "higashi_hiroshima" "kumamoto" "morioka")
output="./tsv/omf_source_in_place.tsv"
rm $output

for city in "${cities[@]}"; do
echo $city
psql -d postgres -c "\copy (
select
dataset, cnt
from (
select 'total' as dataset, count(*) as cnt from omf.${city}_place
union
select distinct source->>'dataset' as dataset, count(*) as cnt
from omf.${city}_place,
jsonb_array_elements(sources) as source
where source->>'dataset' is not null
group by source->>'dataset'
) a
order by dataset desc
) to 'omf_source_in_place.tmp'
with csv delimiter E'\t';"
echo "city $city" >> "$output"
cat omf_source_in_place.tmp >> "$output"
done

# rm tmp
rm omf_source_in_place.tmp

# Display the measurement time.
time=$SECONDS
((sec=time%60, min=(time%3600)/60, hrs=time/3600))
timestamp=$(printf "%d:%02d:%02d" "$hrs" "$min" "$sec")
echo "Processing time is $timestamp"

exit 0
47 changes: 47 additions & 0 deletions Bash/OMF/Analyze/copy_place_to_tsv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

# Initialize the command in seconds.
SECONDS=0

# move to working directory
SCRIPT_DIR=$(cd $(dirname $0); pwd)
cd "${SCRIPT_DIR}"

# make tsv dir
mkdir -p "${SCRIPT_DIR}/tsv"

# analyze
cities=("tokyo" "tateyama" "hamamatsu" "higashi_hiroshima" "kumamoto" "morioka")

for city in "${cities[@]}"; do
echo $city
psql -d postgres -c "\copy (
select
ogc_fid,
ST_AsText(wkb_geometry) as wkb_geometry,
id,
version,
update_time,
sources::text,
names::text,
categories::text,
confidence,
websites,
emails,
socials,
phones,
addresses::text,
brand::text
from
omf.${city}_place
) to './tsv/omf_place_$city.tsv'
with csv delimiter E'\t';"
done

# Display the measurement time.
time=$SECONDS
((sec=time%60, min=(time%3600)/60, hrs=time/3600))
timestamp=$(printf "%d:%02d:%02d" "$hrs" "$min" "$sec")
echo "Processing time is $timestamp"

exit 0
74 changes: 74 additions & 0 deletions Bash/OMF/ETL/omf_download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/bin/bash

### Preprocessing ###

# Initialize the command in seconds.
SECONDS=0

# move to working directory
SCRIPT_DIR=$(cd $(dirname $0); pwd)
cd "${SCRIPT_DIR}"

# get command line arguments
if [ -z "$1" ]; then
echo -e "Write Type in first argument.\n e.g. [building, place]"
exit 1
fi
if [ -z "$2" ]; then
echo -e "Write Release Version in second argument.\nYou could check list of versions:"
aws s3 ls s3://overturemaps-us-west-2/release/ --region us-west-2 --no-sign-request
exit 1
fi
TYPE=$1 # e.g. [building, place]
RELEASE_VERSION=$2 # e.g. [latest, 2024-07-22.0, 2024-06-13-beta.1]

# create data directory
DATA_DIR="$SCRIPT_DIR/data/$RELEASE_VERSION/overturemaps/"
mkdir -p $DATA_DIR

# change resource in "git@github.com:OvertureMaps/overturemaps-py.git" to change
cd "../overturemaps-py"
git checkout .
git pull origin main
before="overturemaps-us-west-2/release/.*/theme"
after="overturemaps-us-west-2/release/$RELEASE_VERSION/theme"
sed -i '' "s|$before|$after|g" "../overturemaps-py/overturemaps/core.py"
cd -

function omf_download(){
if [ "$RELEASE_VERSION" = "latest" ]; then
overturemaps download $@
else
poetry run overturemaps download $@
fi
}

### Main ###

# Tokyo
omf_download --bbox=139.74609375,35.67514744,139.83398438,35.74651226 -f geojson --type=$TYPE -o "$DATA_DIR/tokyo_$TYPE.geojson"

# Hamamatsu
omf_download --bbox=137.63671875,34.66935855,137.72460938,34.7416125 -f geojson --type=$TYPE -o "$DATA_DIR/hamamatsu_$TYPE.geojson"

# Tateyama
omf_download --bbox=139.83398438,34.95799531,139.921875,35.02999637 -f geojson --type=$TYPE -o "$DATA_DIR/tateyama_$TYPE.geojson"

# Kumamoto
omf_download --bbox=130.68726409,32.72948989,130.77515472,32.80174385 -f geojson --type=$TYPE -o "$DATA_DIR/kumamoto_$TYPE.geojson"

# Higashi_hiroshima
omf_download --bbox=132.69418348,34.38622724,132.7820741,34.45848119 -f geojson --type=$TYPE -o "$DATA_DIR/higashi_hiroshima_$TYPE.geojson"

# Morioka
omf_download --bbox=141.07765453,39.6823863,141.16554516,39.75375112 -f geojson --type=$TYPE -o "$DATA_DIR/morioka_$TYPE.geojson"

### Postprocessing ###

# Display the measurement time.
time=$SECONDS
((sec=time%60, min=(time%3600)/60, hrs=time/3600))
timestamp=$(printf "%d:%02d:%02d" "$hrs" "$min" "$sec")
echo "Processing time is $timestamp"

exit 0
65 changes: 65 additions & 0 deletions Bash/OMF/ETL/omf_import.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash

### Preprocessing ###

# Initialize the command in seconds.
SECONDS=0

# move to working directory
SCRIPT_DIR=$(cd $(dirname $0); pwd)
cd "${SCRIPT_DIR}"

# get command line arguments
if [ -z "$1" ]; then
echo -e "Write Type in first argument.\n e.g. [building, place]"
exit 1
fi
if [ -z "$2" ]; then
echo -e "Write Release Version in second argument.\nYou could check list of versions:"
aws s3 ls s3://overturemaps-us-west-2/release/ --region us-west-2 --no-sign-request
exit 1
fi
TYPE=$1 # e.g. [building, place]
RELEASE_VERSION=$2 # e.g. [2024-07-22.0, 2024-06-13-beta.1]

# define data dir
DATA_DIR="$SCRIPT_DIR/data/$RELEASE_VERSION/overturemaps/"

### Main ###

echo "Change psql user(-U) or database(-d) if threre is error about postgres"

# create schema
psql -U postgres -d postgres -c "create schema omf;"

# create table
psql -U postgres -d postgres -f "$SCRIPT_DIR/sql/create_tables_$TYPE.sql"

# Tokyo
ogr2ogr -f "PostgreSQL" PG:"host=localhost user=postgres dbname=postgres" -nln omf.tokyo_$TYPE -nlt multipolygon $DATA_DIR/tokyo_$TYPE.geojson

# Hamamatsu
ogr2ogr -f "PostgreSQL" PG:"host=localhost user=postgres dbname=postgres" -nln omf.hamamatsu_$TYPE -nlt multipolygon $DATA_DIR/hamamatsu_$TYPE.geojson

# Tateyama
ogr2ogr -f "PostgreSQL" PG:"host=localhost user=postgres dbname=postgres" -nln omf.tateyama_$TYPE -nlt multipolygon $DATA_DIR/tateyama_$TYPE.geojson

# Kumamoto
ogr2ogr -f "PostgreSQL" PG:"host=localhost user=postgres dbname=postgres" -nln omf.kumamoto_$TYPE -nlt multipolygon $DATA_DIR/kumamoto_$TYPE.geojson

# Higashi_hiroshima
ogr2ogr -f "PostgreSQL" PG:"host=localhost user=postgres dbname=postgres" -nln omf.higashi_hiroshima_$TYPE -nlt multipolygon $DATA_DIR/higashi_hiroshima_$TYPE.geojson

# Morioka
ogr2ogr -f "PostgreSQL" PG:"host=localhost user=postgres dbname=postgres" -nln omf.morioka_$TYPE -nlt multipolygon $DATA_DIR/morioka_$TYPE.geojson


### Postprocessing ###

# Display the measurement time.
time=$SECONDS
((sec=time%60, min=(time%3600)/60, hrs=time/3600))
timestamp=$(printf "%d:%02d:%02d" "$hrs" "$min" "$sec")
echo "Processing time is $timestamp"

exit 0
Loading
Loading