From 6a58f30558679a51dc273a347427b216c1802397 Mon Sep 17 00:00:00 2001 From: Adrien Merat Date: Tue, 29 Apr 2025 13:22:46 +0200 Subject: [PATCH 1/2] Add vehicles and vehicle_positions tables --- alembic/env.py | 8 ++- .../versions/2025_04_29_1155-7076062a2ab9.py | 53 ++++++++++++++ app/db/models.py | 72 ++++++++++++++++++- docker-compose.yaml | 2 + 4 files changed, 131 insertions(+), 4 deletions(-) create mode 100644 alembic/versions/2025_04_29_1155-7076062a2ab9.py diff --git a/alembic/env.py b/alembic/env.py index 90d06bb..beb98fe 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -5,7 +5,7 @@ from alembic import context -from app.db.models import Base +from app.db.models import Base, Vehicle, VehiclePosition # this is the Alembic Config object, which provides # access to the values within the .ini file in use. @@ -67,10 +67,14 @@ def run_migrations_online() -> None: with connectable.connect() as connection: context.configure( - connection=connection, target_metadata=target_metadata + connection=connection, + target_metadata=target_metadata, + version_table_schema=target_metadata.schema, + include_schemas=True ) with context.begin_transaction(): + context.execute('SET search_path TO public') context.run_migrations() diff --git a/alembic/versions/2025_04_29_1155-7076062a2ab9.py b/alembic/versions/2025_04_29_1155-7076062a2ab9.py new file mode 100644 index 0000000..061a547 --- /dev/null +++ b/alembic/versions/2025_04_29_1155-7076062a2ab9.py @@ -0,0 +1,53 @@ +"""Create vehicles & vehicle_positions tables + +Revision ID: 7076062a2ab9 +Revises: +Create Date: 2025-04-29 11:55:10.699738 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = '7076062a2ab9' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'vehicles', + sa.Column('id', sa.UUID(), server_default=sa.text('gen_random_uuid()'), nullable=False), + sa.Column('trip_id', sa.String(), nullable=False), + sa.Column('line_product', sa.Enum('BUS', 'SUBWAY', 'TRAMWAY', 'SUBURBAN', 'FERRY', 'EXPRESS', 'REGIONAL', name='line_product_enum'), nullable=False), + sa.Column('line_name', sa.String(), nullable=False), + sa.Column('partition_dt', sa.Date(), nullable=False), + sa.PrimaryKeyConstraint('id'), + postgresql_partition_by='RANGE (partition_dt)' + ) + op.create_table( + 'vehicle_positions', + sa.Column('vehicle_id', sa.UUID(), nullable=False), + sa.Column('timestamp', postgresql.TIMESTAMP(), nullable=False), + sa.Column('latitude', sa.DECIMAL(precision=38, scale=18), nullable=False), + sa.Column('longitude', sa.DECIMAL(precision=38, scale=18), nullable=False), + sa.Column('partition_dt', sa.Date(), nullable=False), + sa.ForeignKeyConstraint(['vehicle_id'], ['vehicles.id'], ondelete='CASCADE'), + sa.PrimaryKeyConstraint('vehicle_id', 'timestamp'), + postgresql_partition_by='RANGE (partition_dt)' + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('vehicle_positions') + op.drop_table('vehicles') + # ### end Alembic commands ### diff --git a/app/db/models.py b/app/db/models.py index 860e542..2558c35 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -1,3 +1,71 @@ -from sqlalchemy.ext.declarative import declarative_base +import enum +import uuid +from datetime import date, datetime +from typing import List -Base = declarative_base() +from sqlalchemy import DECIMAL, Date, Enum, ForeignKey, String, func +from sqlalchemy.dialects.postgresql import TIMESTAMP, UUID +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship + + +class Base(DeclarativeBase): + pass + + +class LineProductEnum(enum.Enum): + BUS = "bus" + SUBWAY = "subway" + TRAMWAY = "tram" + SUBURBAN = "suburban" # S-Bahn + FERRY = "ferry" + EXPRESS = "express" # IC/ICE trains + REGIONAL = "regional" # Regio trains + + +class Vehicle(Base): + """ + Table representing a vehicle, partitioned by day. + A vehicle is represented by its trip id as the API doesn't provide the actual id of vehicles themselves, thus + we can have multiple trip_ids which represent the same real-world vehicle. + """ + __tablename__ = "vehicles" + __table_args__ = ( + {'postgresql_partition_by': 'RANGE (partition_dt)'} + ) + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + primary_key=True, + server_default=func.gen_random_uuid() + ) + trip_id: Mapped[str] = mapped_column(String, nullable=False) + line_product: Mapped[LineProductEnum] = mapped_column(Enum(LineProductEnum, name="line_product_enum"), nullable=False) + line_name: Mapped[str] = mapped_column(String, nullable=False) + partition_dt: Mapped[date] = mapped_column(Date, nullable=False) + + positions: Mapped[List["VehiclePosition"]] = relationship( + back_populates="vehicle", + cascade="all, delete-orphan" + ) + + +class VehiclePosition(Base): + """ + Table representing the position of a vehicle at a given time, partitioned by day. + """ + __tablename__ = "vehicle_positions" + __table_args__ = ( + {'postgresql_partition_by': 'RANGE (partition_dt)'}, + ) + + vehicle_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey('vehicles.id', ondelete='CASCADE'), + primary_key=True + ) + timestamp: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=False), primary_key=True) + latitude: Mapped[float] = mapped_column(DECIMAL(38, 18), nullable=False) + longitude: Mapped[float] = mapped_column(DECIMAL(38, 18), nullable=False) + partition_dt: Mapped[date] = mapped_column(Date, nullable=False) + + vehicle: Mapped["Vehicle"] = relationship(back_populates="positions") diff --git a/docker-compose.yaml b/docker-compose.yaml index 723aced..de8de6c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,6 +1,8 @@ services: postgres: image: postgres:15.6 + ports: + - "5432:5432" environment: POSTGRES_USER: ${POSTGRES_USER:-postgres} POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-S3cr3t_P4ssw0rd} From 200f86959b467772c167b2305f1f1de5cf65762a Mon Sep 17 00:00:00 2001 From: Adrien Merat Date: Tue, 29 Apr 2025 13:56:10 +0200 Subject: [PATCH 2/2] Update models and migration to handle partitioning --- .../versions/2025_04_29_1155-7076062a2ab9.py | 62 +++++++++++-------- app/db/models.py | 4 +- 2 files changed, 37 insertions(+), 29 deletions(-) diff --git a/alembic/versions/2025_04_29_1155-7076062a2ab9.py b/alembic/versions/2025_04_29_1155-7076062a2ab9.py index 061a547..be2e909 100644 --- a/alembic/versions/2025_04_29_1155-7076062a2ab9.py +++ b/alembic/versions/2025_04_29_1155-7076062a2ab9.py @@ -20,34 +20,42 @@ def upgrade() -> None: """Upgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'vehicles', - sa.Column('id', sa.UUID(), server_default=sa.text('gen_random_uuid()'), nullable=False), - sa.Column('trip_id', sa.String(), nullable=False), - sa.Column('line_product', sa.Enum('BUS', 'SUBWAY', 'TRAMWAY', 'SUBURBAN', 'FERRY', 'EXPRESS', 'REGIONAL', name='line_product_enum'), nullable=False), - sa.Column('line_name', sa.String(), nullable=False), - sa.Column('partition_dt', sa.Date(), nullable=False), - sa.PrimaryKeyConstraint('id'), - postgresql_partition_by='RANGE (partition_dt)' - ) - op.create_table( - 'vehicle_positions', - sa.Column('vehicle_id', sa.UUID(), nullable=False), - sa.Column('timestamp', postgresql.TIMESTAMP(), nullable=False), - sa.Column('latitude', sa.DECIMAL(precision=38, scale=18), nullable=False), - sa.Column('longitude', sa.DECIMAL(precision=38, scale=18), nullable=False), - sa.Column('partition_dt', sa.Date(), nullable=False), - sa.ForeignKeyConstraint(['vehicle_id'], ['vehicles.id'], ondelete='CASCADE'), - sa.PrimaryKeyConstraint('vehicle_id', 'timestamp'), - postgresql_partition_by='RANGE (partition_dt)' - ) - # ### end Alembic commands ### + # Create enum type explicitly + op.execute(""" + CREATE TYPE line_product_enum AS ENUM ( + 'BUS', 'SUBWAY', 'TRAMWAY', 'SUBURBAN', 'FERRY', 'EXPRESS', 'REGIONAL' + ); + """) + + # Create partitioned 'vehicles' table + op.execute(""" + CREATE TABLE vehicles + ( + id UUID NOT NULL DEFAULT gen_random_uuid(), + trip_id TEXT NOT NULL, + line_product line_product_enum NOT NULL, + line_name TEXT NOT NULL, + partition_dt DATE NOT NULL, + PRIMARY KEY (id, partition_dt) + ) PARTITION BY RANGE (partition_dt); + """) + # Create partitioned 'vehicle_positions' table + op.execute(""" + CREATE TABLE vehicle_positions + ( + vehicle_id UUID NOT NULL, + timestamp TIMESTAMP NOT NULL, + latitude NUMERIC(38, 18) NOT NULL, + longitude NUMERIC(38, 18) NOT NULL, + partition_dt DATE NOT NULL, + PRIMARY KEY (vehicle_id, timestamp, partition_dt), + FOREIGN KEY (vehicle_id, partition_dt) REFERENCES vehicles (id, partition_dt) ON DELETE CASCADE + ) PARTITION BY RANGE (partition_dt); + """) def downgrade() -> None: """Downgrade schema.""" - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('vehicle_positions') - op.drop_table('vehicles') - # ### end Alembic commands ### + op.execute("DROP TABLE IF EXISTS vehicle_positions CASCADE;") + op.execute("DROP TABLE IF EXISTS vehicles CASCADE;") + op.execute("DROP TYPE IF EXISTS line_product_enum;") diff --git a/app/db/models.py b/app/db/models.py index 2558c35..633e346 100644 --- a/app/db/models.py +++ b/app/db/models.py @@ -41,7 +41,7 @@ class Vehicle(Base): trip_id: Mapped[str] = mapped_column(String, nullable=False) line_product: Mapped[LineProductEnum] = mapped_column(Enum(LineProductEnum, name="line_product_enum"), nullable=False) line_name: Mapped[str] = mapped_column(String, nullable=False) - partition_dt: Mapped[date] = mapped_column(Date, nullable=False) + partition_dt: Mapped[date] = mapped_column(Date, primary_key=True) positions: Mapped[List["VehiclePosition"]] = relationship( back_populates="vehicle", @@ -66,6 +66,6 @@ class VehiclePosition(Base): timestamp: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=False), primary_key=True) latitude: Mapped[float] = mapped_column(DECIMAL(38, 18), nullable=False) longitude: Mapped[float] = mapped_column(DECIMAL(38, 18), nullable=False) - partition_dt: Mapped[date] = mapped_column(Date, nullable=False) + partition_dt: Mapped[date] = mapped_column(Date, primary_key=True) vehicle: Mapped["Vehicle"] = relationship(back_populates="positions")