Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert spatialite to geopackage on any migration to 300 or above #171

Merged
merged 12 commits into from
Jan 24, 2025
Merged
4 changes: 2 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ Changelog of threedi-schema
===================================================


0.230.4 (unreleased)
0.300 (unreleased)
--------------------

- Nothing changed yet.
- Convert spatialite to geopackage during upgrade


0.230.3 (2025-01-23)
Expand Down
82 changes: 66 additions & 16 deletions threedi_schema/application/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,33 @@ def epsg_code(self):
def epsg_source(self):
return self._get_epsg_data()[1]

@property
def is_geopackage(self):
with self.db.get_session() as session:
return bool(
session.execute(
text(
"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='gpkg_contents';"
)
).scalar()
)

@property
def is_spatialite(self):
with self.db.get_session() as session:
return bool(
session.execute(
text(
"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='spatial_ref_sys';"
)
).scalar()
)

def upgrade(
self,
revision="head",
backup=True,
upgrade_spatialite_version=False,
convert_to_geopackage=False,
progress_func=None,
custom_epsg_code=None,
):
Expand All @@ -141,9 +162,6 @@ def upgrade(
Specify 'upgrade_spatialite_version=True' to also upgrade the
spatialite file version after the upgrade.

Specify 'convert_to_geopackage=True' to also convert from spatialite
to geopackage file version after the upgrade.

Specify a 'progress_func' to handle progress updates. `progress_func` should
expect a single argument representing the fraction of progress

Expand All @@ -156,18 +174,29 @@ def upgrade(
raise ValueError(
f"Incorrect version format: {revision}. Expected 'head' or a numeric value."
)
if convert_to_geopackage and rev_nr < 300:
raise UpgradeFailedError(
f"Cannot convert to geopackage for {revision=} because geopackage support is "
"enabled from revision 300",
)
v = self.get_version()
if v is not None and v < constants.LATEST_SOUTH_MIGRATION_ID:
raise MigrationMissingError(
f"This tool cannot update versions below "
f"{constants.LATEST_SOUTH_MIGRATION_ID}. Please consult the "
f"3Di documentation on how to update legacy databases."
)
if (
v is not None
and v <= constants.LAST_SPTL_SCHEMA_VERSION
and not self.is_spatialite
):
raise UpgradeFailedError(
f"Cannot upgrade from {revision=} because {self.db.path} is not a spatialite"
)
elif (
v is not None
and v > constants.LAST_SPTL_SCHEMA_VERSION
and not self.is_geopackage
):
raise UpgradeFailedError(
f"Cannot upgrade from {revision=} because {self.db.path} is not a geopackage"
)

def run_upgrade(_revision):
if backup:
Expand Down Expand Up @@ -201,11 +230,20 @@ def run_upgrade(_revision):
self._set_custom_epsg_code(custom_epsg_code)
run_upgrade("0230")
self._remove_custom_epsg_code()
run_upgrade(revision)
if upgrade_spatialite_version:
# First upgrade to LAST_SPTL_SCHEMA_VERSION.
# When the requested revision <= LAST_SPTL_SCHEMA_VERSION, this is the only upgrade step
run_upgrade(
revision
if rev_nr <= constants.LAST_SPTL_SCHEMA_VERSION
else f"{constants.LAST_SPTL_SCHEMA_VERSION:04d}"
)
# only upgrade spatialite version is target revision is <= LAST_SPTL_SCHEMA_VERSION
if rev_nr <= constants.LAST_SPTL_SCHEMA_VERSION and upgrade_spatialite_version:
self.upgrade_spatialite_version()
elif convert_to_geopackage:
# Finish upgrade if target revision > LAST_SPTL_SCHEMA_VERSION
elif rev_nr > constants.LAST_SPTL_SCHEMA_VERSION:
self.convert_to_geopackage()
run_upgrade(revision)

def _set_custom_epsg_code(self, custom_epsg_code: int):
if (
Expand Down Expand Up @@ -269,7 +307,7 @@ def set_spatial_indexes(self):
f"{schema_version}. Current version: {version}."
)

ensure_spatial_indexes(self.db, models.DECLARED_MODELS)
ensure_spatial_indexes(self.db.engine, models.DECLARED_MODELS)

def upgrade_spatialite_version(self):
"""Upgrade the version of the spatialite file to the version of the
Expand All @@ -282,7 +320,11 @@ def upgrade_spatialite_version(self):
"""
lib_version, file_version = get_spatialite_version(self.db)
if file_version == 3 and lib_version in (4, 5):
self.validate_schema()
if self.get_version() != constants.LAST_SPTL_SCHEMA_VERSION:
raise MigrationMissingError(
f"This tool requires schema version "
f"{constants.LAST_SPTL_SCHEMA_VERSION:}. Current version: {self.get_version()}."
)
with self.db.file_transaction(start_empty=True) as work_db:
rev_nr = min(get_schema_version(), 229)
first_rev = f"{rev_nr:04d}"
Expand Down Expand Up @@ -327,8 +369,15 @@ def convert_to_geopackage(self):
return

# Ensure database is upgraded and views are recreated
self.upgrade()
self.validate_schema()
revision = self.get_version()
if revision is None or revision <= constants.LAST_SPTL_SCHEMA_VERSION:
self.upgrade(
revision=f"{constants.LAST_SPTL_SCHEMA_VERSION:04d}", backup=False
)
elif revision > constants.LAST_SPTL_SCHEMA_VERSION:
UpgradeFailedError(
f"Cannot convert schema version {revision} to geopackage"
)
# Make necessary modifications for conversion on temporary database
with self.db.file_transaction(start_empty=False, copy_results=False) as work_db:
# remove spatialite specific tables that break conversion
Expand Down Expand Up @@ -410,3 +459,4 @@ def convert_to_geopackage(self):
"CREATE TABLE views_geometry_columns(view_name TEXT, view_geometry TEXT, view_rowid TEXT, f_table_name VARCHAR(256), f_geometry_column VARCHAR(256))"
)
)
ensure_spatial_indexes(self.db.engine, models.DECLARED_MODELS)
1 change: 1 addition & 0 deletions threedi_schema/domain/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from enum import Enum

LATEST_SOUTH_MIGRATION_ID = 160
LAST_SPTL_SCHEMA_VERSION = 230
VERSION_TABLE_NAME = "schema_version"


Expand Down
69 changes: 33 additions & 36 deletions threedi_schema/infrastructure/spatial_index.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,47 @@
from geoalchemy2.types import Geometry
from sqlalchemy import func, text
from sqlalchemy import func, inspect, text

__all__ = ["ensure_spatial_indexes"]


def _ensure_spatial_index(connection, column):
"""Ensure presence of a spatial index for given geometry olumn"""
if (
connection.execute(
func.RecoverSpatialIndex(column.table.name, column.name)
).scalar()
is not None
):
return False

def create_spatial_index(connection, column):
"""
Create spatial index for given column.
Note that this will fail if the spatial index already exists!
"""
idx_name = f"{column.table.name}_{column.name}"
connection.execute(text(f"DROP TABLE IF EXISTS idx_{idx_name}"))
for prefix in {"gii_", "giu_", "gid_"}:
connection.execute(text(f"DROP TRIGGER IF EXISTS {prefix}{idx_name}"))
if (
connection.execute(
func.CreateSpatialIndex(column.table.name, column.name)
).scalar()
!= 1
):
raise RuntimeError(f"Spatial index creation for {idx_name} failed")

try:
connection.execute(func.gpkgAddSpatialIndex(column.table.name, column.name))
except Exception as e:
raise RuntimeError(
f"Spatial index creation for {idx_name} failed with error {e}"
)
return True


def ensure_spatial_indexes(db, models):
def get_missing_spatial_indexes(engine, models):
"""
Collect all rtree tables that should exist
There can only be one geometry column per table and we assume any geometry column is named geom
"""
inspector = inspect(engine)
table_names = inspector.get_table_names()
return [
model
for model in models
if "geom" in model.__table__.columns
and f"rtree_{model.__table__.name}_geom" not in table_names
]


def ensure_spatial_indexes(engine, models):
"""Ensure presence of spatial indexes for all geometry columns"""
created = False
engine = db.engine

no_spatial_index_models = get_missing_spatial_indexes(engine, models)
with engine.connect() as connection:
with connection.begin():
for model in models:
geom_columns = [
x for x in model.__table__.columns if isinstance(x.type, Geometry)
]
if len(geom_columns) > 1:
# Pragmatic fix: spatialindex breaks on multiple geometry columns per table
geom_columns = [x for x in geom_columns if x.name == "the_geom"]
if geom_columns:
created &= _ensure_spatial_index(connection, geom_columns[0])

for model in no_spatial_index_models:
created &= create_spatial_index(
connection, model.__table__.columns["geom"]
)
if created:
connection.execute(text("VACUUM"))
30 changes: 30 additions & 0 deletions threedi_schema/migrations/versions/0300_geopackage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Reproject geometries to model CRS

Revision ID: 0230
Revises:
Create Date: 2024-11-12 12:30

"""
import sqlite3
import uuid

import sqlalchemy as sa
from alembic import op

from threedi_schema.migrations.exceptions import InvalidSRIDException

# revision identifiers, used by Alembic.
revision = "0300"
down_revision = "0230"
branch_labels = None
depends_on = None


def upgrade():
# this upgrade only changes the model version
pass


def downgrade():
# Not implemented on purpose
pass
1 change: 0 additions & 1 deletion threedi_schema/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def migrate(
revision=revision,
backup=backup,
upgrade_spatialite_version=upgrade_spatialite_version,
convert_to_geopackage=convert_to_geopackage,
)
click.echo("The migrated schema revision is: %s" % schema.get_version())

Expand Down
7 changes: 3 additions & 4 deletions threedi_schema/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ def in_memory_sqlite():


@pytest.fixture
def sqlite_latest(in_memory_sqlite):
def sqlite_latest(empty_sqlite_v4):
"""An in-memory database with the latest schema version"""
db = ThreediDatabase("")
in_memory_sqlite.schema.upgrade("head", backup=False, custom_epsg_code=28992)
return db
empty_sqlite_v4.schema.upgrade("head", backup=False, custom_epsg_code=28992)
return empty_sqlite_v4
31 changes: 9 additions & 22 deletions threedi_schema/tests/test_gpkg.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,18 @@
import pytest
from sqlalchemy import text

from threedi_schema.domain import constants


@pytest.mark.parametrize("upgrade_spatialite", [True, False])
def test_convert_to_geopackage(oldest_sqlite, upgrade_spatialite):
if upgrade_spatialite:
oldest_sqlite.schema.upgrade(upgrade_spatialite_version=True)
# if upgrade_spatialite:
oldest_sqlite.schema.upgrade(
upgrade_spatialite_version=upgrade_spatialite,
revision=f"{constants.LAST_SPTL_SCHEMA_VERSION:04d}",
)

oldest_sqlite.schema.convert_to_geopackage()
# Ensure that after the conversion the geopackage is used
assert oldest_sqlite.path.suffix == ".gpkg"
with oldest_sqlite.session_scope() as session:
gpkg_table_exists = bool(
session.execute(
text(
"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='gpkg_contents';"
)
).scalar()
)
spatialite_table_exists = bool(
session.execute(
text(
"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='spatial_ref_sys';"
)
).scalar()
)

assert gpkg_table_exists
assert not spatialite_table_exists
assert oldest_sqlite.schema.validate_schema()
assert not oldest_sqlite.schema.is_spatialite
assert oldest_sqlite.schema.is_geopackage
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_check_valid_crs(in_memory_sqlite, epsg_code):

def test_migration(tmp_path_factory, oldest_sqlite):
schema = ModelSchema(oldest_sqlite)
schema.upgrade(backup=False)
schema.upgrade(backup=False, revision="0230")
cursor = sqlite3.connect(schema.db.path).cursor()
query = cursor.execute("SELECT srid FROM geometry_columns where f_table_name = 'geom'")
epsg_matches = [int(item[0])==28992 for item in query.fetchall()]
Expand Down
Loading
Loading