epam · ypldan · Nov 14, 2025 · Nov 17, 2025 · Dec 3, 2025 · Dec 3, 2025
@@ -1,6 +1,9 @@
 # Poetry
 POETRY_PYTHON=python3
 
+# AI DIAL SDK
+PYDANTIC_V2=True
+
 # DataBase
 PGVECTOR_HOST=localhost
 PGVECTOR_PORT=5432

@@ -59,20 +59,22 @@ readability, and maintainability across the codebase.
 - **Use the correct Built-In Generics**:
   - It’s recommended to use built-in generic types instead of `typing.List`, `typing.Dict`,
     `typing.Tuple`, etc. For example:
-      - Use `list[str]` instead of `typing.List[str]`.
-      - Use `dict[str, int]` instead of `typing.Dict[str, int]`.
-      - Use `tuple[str, float]` instead of `typing.Tuple[str, float]`.
+    - Use `list[str]` instead of `typing.List[str]`.
+    - Use `dict[str, int]` instead of `typing.Dict[str, int]`.
+    - Use `tuple[str, float]` instead of `typing.Tuple[str, float]`.
   - Some other generics have been moved from the `typing` module to `collections.abc.`
     It is recommended to import them from the new module. For, example:
-      - Use `collections.abc.Iterable` instead of `typing.Iterable`.
-      - Use `collections.abc.Iterator` instead of `typing.Iterator`.
-      - Use `collections.abc.Callable` instead of `typing.Callable`.
+    - Use `collections.abc.Iterable` instead of `typing.Iterable`.
+    - Use `collections.abc.Iterator` instead of `typing.Iterator`.
+    - Use `collections.abc.Callable` instead of `typing.Callable`.
 - **Annotations for Readability**:
-    - Write function signatures with type hints, e.g., `def my_func(name: str) -> None: ...`.
-    - This improves code readability and assists with IDE-based autocompletion.
+  - Write function signatures with type hints, e.g., `def my_func(name: str) -> None: ...`.
+  - This improves code readability and assists with IDE-based autocompletion.
 - **Union Types**:
-    - In Python 3.10+, you can use the “pipe” (`|`) symbol to indicate union types. For example, `str | None` instead of
-      `Optional[str]`.
+  - In Python 3.10+, you can use the “pipe” (`|`) symbol to indicate union types. For example, `str | None` instead of `Optional[str]`
+- **Factory Methods**
+  - For a `@classmethod` that acts as a factory method, use `typing.Self` as return type.
+  [Reference](https://docs.python.org/3/library/typing.html#typing.Self)
 
 ## 8. Code Organization
 

@@ -5,6 +5,9 @@ MYPY_DIRS = src/common src/admin_portal src/statgpt
 -include .env
 export
 
+# AI DIAL SDK: pydantic v2 mode
+export PYDANTIC_V2=True
+
 remove_venv:
 	poetry env remove --all || true
 	$(POETRY_PYTHON) -m venv .venv

@@ -20,17 +20,16 @@ COPY pyproject.toml .
 COPY poetry.lock .
 RUN poetry export -f requirements.txt --without-hashes | pip install $PIP_ARGS -r /dev/stdin
 
-# Copy scripts and source code
+# Copy source code
 COPY ./src/alembic.ini $APP_HOME/alembic.ini
-COPY ./scripts/admin.sh $APP_HOME/admin.sh
 COPY ./src/admin_portal $APP_HOME/admin_portal
 COPY ./src/common $APP_HOME/common
 
 # create the app user and chown workdir to the app user
 RUN adduser -u 5678 --system --disabled-password --gecos "" app && chown -R app $APP_HOME
 USER app
 
-ENV APP_MODE="DIAL"
 ENV WEB_CONCURRENCY=1
+ENV PYDANTIC_V2=True
 
-CMD ["sh", "admin.sh"]
+CMD ["sh", "admin_portal/admin.sh"]
@@ -20,7 +20,7 @@ COPY pyproject.toml .
 COPY poetry.lock .
 RUN poetry export -f requirements.txt --without-hashes | pip install $PIP_ARGS -r /dev/stdin
 
-# Copy scripts and source code
+# Copy source code
 COPY ./src/statgpt $APP_HOME/statgpt
 COPY ./src/common $APP_HOME/common
 
@@ -30,6 +30,7 @@ USER app
 
 ENV APP_MODE="DIAL"
 ENV WEB_CONCURRENCY=1
+ENV PYDANTIC_V2=True
 
 EXPOSE 5000
 

@@ -8,7 +8,7 @@ license = "MIT"
 license-files = ["LICENSE"]
 dependencies = [
     # Core framework & API
-    'aidial-sdk[telemetry] (>=0.25.1,<0.26.0)',  # DIAL integration SDK
+    'aidial-sdk[telemetry] (>=0.27.0,<0.28.0)',  # DIAL integration SDK
     'fastapi (>=0.121.0,<0.122.0)',              # Web framework
     'pydantic (>=2.11.7,<3.0.0)',                # Data validation
     'pydantic-core (>=2.33.2,<3.0.0)',           # Pydantic core functionality

@@ -0,0 +1,32 @@
+#!/bin/bash
+
+echo "ADMIN_MODE = '$ADMIN_MODE'"
+
+case $ADMIN_MODE in
+
+  APP)
+    uvicorn "admin_portal.app:app" --host "0.0.0.0" --port 8000 --lifespan on
+    ;;
+
+  ALEMBIC_UPGRADE)
+    alembic upgrade head
+    ;;
+
+  FIX_STATUSES)
+    python -m admin_portal.fix_statuses
+    ;;
+
+  INIT)
+    alembic upgrade head
+    python -m admin_portal.fix_statuses
+    ;;
+
+  *)
+    echo "Unknown ADMIN_MODE = '$ADMIN_MODE'. Possible values:"
+    echo "  APP - start the admin portal application"
+    echo "  ALEMBIC_UPGRADE - run alembic migrations to upgrade the database"
+    echo "  FIX_STATUSES - fix inconsistent statuses in the database"
+    echo "  INIT - run alembic migrations and fix inconsistent statuses"
+    exit 1
+    ;;
+esac
@@ -0,0 +1,64 @@
+"""Add data hashes for a channel dataset version
+
+Revision ID: d528d881ece8
+Revises: 10fe795dc09d
+Create Date: 2025-11-14 14:00:16.313951
+
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = 'd528d881ece8'
+down_revision: str | None = '10fe795dc09d'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        'channel_dataset_versions',
+        sa.Column(
+            'structure_metadata',
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+            server_default=None,
+        ),
+    )
+    op.add_column(
+        'channel_dataset_versions',
+        sa.Column('structure_hash', sa.String(length=10), nullable=True, server_default=None),
+    )
+    op.add_column(
+        'channel_dataset_versions',
+        sa.Column(
+            'indicator_dimensions_hash', sa.String(length=10), nullable=True, server_default=None
+        ),
+    )
+    op.add_column(
+        'channel_dataset_versions',
+        sa.Column(
+            'non_indicator_dimensions_hash',
+            sa.String(length=10),
+            nullable=True,
+            server_default=None,
+        ),
+    )
+    op.add_column(
+        'channel_dataset_versions',
+        sa.Column(
+            'special_dimensions_hash', sa.String(length=10), nullable=True, server_default=None
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column('channel_dataset_versions', 'special_dimensions_hash')
+    op.drop_column('channel_dataset_versions', 'non_indicator_dimensions_hash')
+    op.drop_column('channel_dataset_versions', 'indicator_dimensions_hash')
+    op.drop_column('channel_dataset_versions', 'structure_hash')
+    op.drop_column('channel_dataset_versions', 'structure_metadata')
@@ -0,0 +1,82 @@
+"""Reset sequences for vector store
+
+Revision ID: c64458e37902
+Revises: d528d881ece8
+Create Date: 2025-11-15 08:34:48.017064
+
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = 'c64458e37902'
+down_revision: str | None = 'd528d881ece8'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    """Reset sequences for vector store tables to prevent duplicate key errors.
+
+    This migration fixes sequence values for tables that had explicit IDs set during import,
+    which caused the sequences to fall behind the actual maximum ID values in the tables.
+    """
+    conn = op.get_bind()
+
+    # Get all tables in the collections schema matching the prefixes
+    result = conn.execute(
+        sa.text(
+            """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema = 'collections'
+          AND table_type = 'BASE TABLE'
+          AND (table_name LIKE 'AvailableDimensions%'
+               OR table_name LIKE 'Indicators%'
+               OR table_name LIKE 'SpecialDimensions%')
+        ORDER BY table_name
+    """
+        )
+    )
+
+    tables = [row[0] for row in result]
+
+    for table_name in tables:
+        # Get the sequence name for the id column
+        seq_result = conn.execute(
+            sa.text(
+                """
+            SELECT pg_get_serial_sequence(:full_table_name, 'id')
+        """
+            ),
+            {"full_table_name": f'collections."{table_name}"'},
+        )
+
+        sequence_name = seq_result.scalar()
+
+        if sequence_name:
+            # Reset the sequence to MAX(id) + 1
+            # Use COALESCE to handle empty tables (set to 1 in that case)
+            conn.execute(
+                sa.text(
+                    f"""
+                SELECT setval(
+                    :sequence_name,
+                    COALESCE((SELECT MAX(id) FROM collections."{table_name}"), 1)
+                )
+            """
+                ),
+                {"sequence_name": sequence_name},
+            )
+
+            print(f"Reset sequence for table: {table_name} -> {sequence_name}")
+        else:
+            print(f"No sequence found for table: {table_name}")
+
+
+def downgrade() -> None:
+    # This migration cannot be reversed as we don't know the original sequence values
+    pass
@@ -0,0 +1,35 @@
+"""Add cleanup status to channel dataset table
+
+Revision ID: 65c149c7db9e
+Revises: d528d881ece8
+Create Date: 2025-11-14 16:33:52.089023
+
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision: str = '65c149c7db9e'
+down_revision: str | None = 'c64458e37902'
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        'channel_datasets',
+        sa.Column(
+            'clearing_status',
+            postgresql.ENUM(name='preprocessingstatusenum', create_type=False),
+            nullable=False,
+            server_default='NOT_STARTED',
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column('channel_datasets', 'clearing_status')
@@ -7,7 +7,7 @@
 import dotenv
 from aidial_sdk.telemetry.init import init_telemetry
 from aidial_sdk.telemetry.types import MetricsConfig, TelemetryConfig, TracingConfig
-from fastapi import FastAPI
+from fastapi import FastAPI, status
 
 module_path = Path(__file__).parent.parent.absolute()
 sys.path.append(str(module_path))
@@ -30,7 +30,7 @@
 async def lifespan(app_: FastAPI):
     async with optional_msi_token_manager_context():
         # Check resources' availability:
-        await DatabaseHealthChecker.check()
+        await DatabaseHealthChecker().check()
 
         # Start data preloading in the background
         asyncio.create_task(preload_data(allow_cached_datasets=False))
@@ -58,8 +58,8 @@ async def lifespan(app_: FastAPI):
 app.include_router(router)
 
 
-@app.get("/health")
-def health():
+@app.get("/health", status_code=status.HTTP_200_OK)
+async def health():
     return {"status": "ok"}