Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Poetry
POETRY_PYTHON=python3

# AI DIAL SDK
PYDANTIC_V2=True

# DataBase
PGVECTOR_HOST=localhost
PGVECTOR_PORT=5432
Expand Down
22 changes: 12 additions & 10 deletions CODE_STYLE.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,22 @@ readability, and maintainability across the codebase.
- **Use the correct Built-In Generics**:
- It’s recommended to use built-in generic types instead of `typing.List`, `typing.Dict`,
`typing.Tuple`, etc. For example:
- Use `list[str]` instead of `typing.List[str]`.
- Use `dict[str, int]` instead of `typing.Dict[str, int]`.
- Use `tuple[str, float]` instead of `typing.Tuple[str, float]`.
- Use `list[str]` instead of `typing.List[str]`.
- Use `dict[str, int]` instead of `typing.Dict[str, int]`.
- Use `tuple[str, float]` instead of `typing.Tuple[str, float]`.
- Some other generics have been moved from the `typing` module to `collections.abc.`
It is recommended to import them from the new module. For, example:
- Use `collections.abc.Iterable` instead of `typing.Iterable`.
- Use `collections.abc.Iterator` instead of `typing.Iterator`.
- Use `collections.abc.Callable` instead of `typing.Callable`.
- Use `collections.abc.Iterable` instead of `typing.Iterable`.
- Use `collections.abc.Iterator` instead of `typing.Iterator`.
- Use `collections.abc.Callable` instead of `typing.Callable`.
- **Annotations for Readability**:
- Write function signatures with type hints, e.g., `def my_func(name: str) -> None: ...`.
- This improves code readability and assists with IDE-based autocompletion.
- Write function signatures with type hints, e.g., `def my_func(name: str) -> None: ...`.
- This improves code readability and assists with IDE-based autocompletion.
- **Union Types**:
- In Python 3.10+, you can use the “pipe” (`|`) symbol to indicate union types. For example, `str | None` instead of
`Optional[str]`.
- In Python 3.10+, you can use the “pipe” (`|`) symbol to indicate union types. For example, `str | None` instead of `Optional[str]`
- **Factory Methods**
- For a `@classmethod` that acts as a factory method, use `typing.Self` as return type.
[Reference](https://docs.python.org/3/library/typing.html#typing.Self)

## 8. Code Organization

Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ MYPY_DIRS = src/common src/admin_portal src/statgpt
-include .env
export

# AI DIAL SDK: pydantic v2 mode
export PYDANTIC_V2=True

remove_venv:
poetry env remove --all || true
$(POETRY_PYTHON) -m venv .venv
Expand Down
7 changes: 3 additions & 4 deletions docker/admin.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,16 @@ COPY pyproject.toml .
COPY poetry.lock .
RUN poetry export -f requirements.txt --without-hashes | pip install $PIP_ARGS -r /dev/stdin

# Copy scripts and source code
# Copy source code
COPY ./src/alembic.ini $APP_HOME/alembic.ini
COPY ./scripts/admin.sh $APP_HOME/admin.sh
COPY ./src/admin_portal $APP_HOME/admin_portal
COPY ./src/common $APP_HOME/common

# create the app user and chown workdir to the app user
RUN adduser -u 5678 --system --disabled-password --gecos "" app && chown -R app $APP_HOME
USER app

ENV APP_MODE="DIAL"
ENV WEB_CONCURRENCY=1
ENV PYDANTIC_V2=True

CMD ["sh", "admin.sh"]
CMD ["sh", "admin_portal/admin.sh"]
3 changes: 2 additions & 1 deletion docker/chat.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ COPY pyproject.toml .
COPY poetry.lock .
RUN poetry export -f requirements.txt --without-hashes | pip install $PIP_ARGS -r /dev/stdin

# Copy scripts and source code
# Copy source code
COPY ./src/statgpt $APP_HOME/statgpt
COPY ./src/common $APP_HOME/common

Expand All @@ -30,6 +30,7 @@ USER app

ENV APP_MODE="DIAL"
ENV WEB_CONCURRENCY=1
ENV PYDANTIC_V2=True

EXPOSE 5000

Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ license = "MIT"
license-files = ["LICENSE"]
dependencies = [
# Core framework & API
'aidial-sdk[telemetry] (>=0.25.1,<0.26.0)', # DIAL integration SDK
'aidial-sdk[telemetry] (>=0.27.0,<0.28.0)', # DIAL integration SDK
'fastapi (>=0.121.0,<0.122.0)', # Web framework
'pydantic (>=2.11.7,<3.0.0)', # Data validation
'pydantic-core (>=2.33.2,<3.0.0)', # Pydantic core functionality
Expand Down
18 changes: 0 additions & 18 deletions scripts/admin.sh

This file was deleted.

32 changes: 32 additions & 0 deletions src/admin_portal/admin.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

echo "ADMIN_MODE = '$ADMIN_MODE'"

case $ADMIN_MODE in

APP)
uvicorn "admin_portal.app:app" --host "0.0.0.0" --port 8000 --lifespan on
;;

ALEMBIC_UPGRADE)
alembic upgrade head
;;

FIX_STATUSES)
python -m admin_portal.fix_statuses
;;

INIT)
alembic upgrade head
python -m admin_portal.fix_statuses
;;

*)
echo "Unknown ADMIN_MODE = '$ADMIN_MODE'. Possible values:"
echo " APP - start the admin portal application"
echo " ALEMBIC_UPGRADE - run alembic migrations to upgrade the database"
echo " FIX_STATUSES - fix inconsistent statuses in the database"
echo " INIT - run alembic migrations and fix inconsistent statuses"
exit 1
;;
esac
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Add data hashes for a channel dataset version

Revision ID: d528d881ece8
Revises: 10fe795dc09d
Create Date: 2025-11-14 14:00:16.313951

"""

from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = 'd528d881ece8'
down_revision: str | None = '10fe795dc09d'
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
op.add_column(
'channel_dataset_versions',
sa.Column(
'structure_metadata',
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
server_default=None,
),
)
op.add_column(
'channel_dataset_versions',
sa.Column('structure_hash', sa.String(length=10), nullable=True, server_default=None),
)
op.add_column(
'channel_dataset_versions',
sa.Column(
'indicator_dimensions_hash', sa.String(length=10), nullable=True, server_default=None
),
)
op.add_column(
'channel_dataset_versions',
sa.Column(
'non_indicator_dimensions_hash',
sa.String(length=10),
nullable=True,
server_default=None,
),
)
op.add_column(
'channel_dataset_versions',
sa.Column(
'special_dimensions_hash', sa.String(length=10), nullable=True, server_default=None
),
)


def downgrade() -> None:
op.drop_column('channel_dataset_versions', 'special_dimensions_hash')
op.drop_column('channel_dataset_versions', 'non_indicator_dimensions_hash')
op.drop_column('channel_dataset_versions', 'indicator_dimensions_hash')
op.drop_column('channel_dataset_versions', 'structure_hash')
op.drop_column('channel_dataset_versions', 'structure_metadata')
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
"""Reset sequences for vector store

Revision ID: c64458e37902
Revises: d528d881ece8
Create Date: 2025-11-15 08:34:48.017064

"""

from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision: str = 'c64458e37902'
down_revision: str | None = 'd528d881ece8'
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Reset sequences for vector store tables to prevent duplicate key errors.

This migration fixes sequence values for tables that had explicit IDs set during import,
which caused the sequences to fall behind the actual maximum ID values in the tables.
"""
conn = op.get_bind()

# Get all tables in the collections schema matching the prefixes
result = conn.execute(
sa.text(
"""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'collections'
AND table_type = 'BASE TABLE'
AND (table_name LIKE 'AvailableDimensions%'
OR table_name LIKE 'Indicators%'
OR table_name LIKE 'SpecialDimensions%')
ORDER BY table_name
"""
)
)

tables = [row[0] for row in result]

for table_name in tables:
# Get the sequence name for the id column
seq_result = conn.execute(
sa.text(
"""
SELECT pg_get_serial_sequence(:full_table_name, 'id')
"""
),
{"full_table_name": f'collections."{table_name}"'},
)

sequence_name = seq_result.scalar()

if sequence_name:
# Reset the sequence to MAX(id) + 1
# Use COALESCE to handle empty tables (set to 1 in that case)
conn.execute(
sa.text(
f"""
SELECT setval(
:sequence_name,
COALESCE((SELECT MAX(id) FROM collections."{table_name}"), 1)
)
"""
),
{"sequence_name": sequence_name},
)

print(f"Reset sequence for table: {table_name} -> {sequence_name}")
else:
print(f"No sequence found for table: {table_name}")


def downgrade() -> None:
# This migration cannot be reversed as we don't know the original sequence values
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
"""Add cleanup status to channel dataset table

Revision ID: 65c149c7db9e
Revises: d528d881ece8
Create Date: 2025-11-14 16:33:52.089023

"""

from collections.abc import Sequence

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision: str = '65c149c7db9e'
down_revision: str | None = 'c64458e37902'
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
op.add_column(
'channel_datasets',
sa.Column(
'clearing_status',
postgresql.ENUM(name='preprocessingstatusenum', create_type=False),
nullable=False,
server_default='NOT_STARTED',
),
)


def downgrade() -> None:
op.drop_column('channel_datasets', 'clearing_status')
8 changes: 4 additions & 4 deletions src/admin_portal/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import dotenv
from aidial_sdk.telemetry.init import init_telemetry
from aidial_sdk.telemetry.types import MetricsConfig, TelemetryConfig, TracingConfig
from fastapi import FastAPI
from fastapi import FastAPI, status

module_path = Path(__file__).parent.parent.absolute()
sys.path.append(str(module_path))
Expand All @@ -30,7 +30,7 @@
async def lifespan(app_: FastAPI):
async with optional_msi_token_manager_context():
# Check resources' availability:
await DatabaseHealthChecker.check()
await DatabaseHealthChecker().check()

# Start data preloading in the background
asyncio.create_task(preload_data(allow_cached_datasets=False))
Expand Down Expand Up @@ -58,8 +58,8 @@ async def lifespan(app_: FastAPI):
app.include_router(router)


@app.get("/health")
def health():
@app.get("/health", status_code=status.HTTP_200_OK)
async def health():
return {"status": "ok"}


Expand Down
Loading
Loading