Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backend/app/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from fastapi import APIRouter
from app.api import auth, ingest, logs, admin
from app.api import auth, ingest, logs, admin, analytics

api_router = APIRouter()

api_router.include_router(auth.router, prefix="/auth", tags=["auth"])
api_router.include_router(ingest.router, prefix="/ingest", tags=["ingest"])
api_router.include_router(logs.router, prefix="/teams", tags=["logs"])
api_router.include_router(analytics.router, prefix="/teams", tags=["analytics"])
api_router.include_router(admin.router, prefix="/admin", tags=["admin"])
185 changes: 185 additions & 0 deletions backend/app/api/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
from uuid import UUID
from typing import Annotated, Literal
from datetime import datetime, timedelta, timezone
from fastapi import APIRouter, Query
from tortoise import connections
from app.models import LogLevel
from app.schemas import VolumeResponse, VolumeBucket, TopResponse, TopItem, HeatmapResponse, HeatmapCell
from app.api.deps import get_team_member, CurrentUser

router = APIRouter()

VALID_LEVELS = [e.value for e in LogLevel]


def _default_range(
from_time: datetime | None,
to_time: datetime | None,
) -> tuple[datetime, datetime]:
now = datetime.now(timezone.utc)
return (
from_time or now - timedelta(hours=24),
to_time or now,
)


BUCKET_SQL = {
"hour": "date_trunc('hour', timestamp)",
"day": "date_trunc('day', timestamp)",
"week": "date_trunc('week', timestamp)",
}


@router.get("/{team_id}/analytics/volume", response_model=VolumeResponse)
async def analytics_volume(
team_id: UUID,
user: CurrentUser,
bucket: Literal["hour", "day", "week"] = "hour",
split_by: Literal["level", "source", "omit"] = "level",
from_time: datetime | None = Query(None, alias="from"),
to_time: datetime | None = Query(None, alias="to"),
):
team, _ = await get_team_member(team_id, user)
start, end = _default_range(from_time, to_time)
conn = connections.get("default")

trunc = BUCKET_SQL[bucket]

if split_by == "omit":
rows = await conn.execute_query_dict(
f"""
SELECT {trunc} AS bucket, count(*) AS count
FROM logs
WHERE team_id = $1 AND timestamp >= $2 AND timestamp <= $3
GROUP BY bucket ORDER BY bucket
""",
[str(team.id), start, end],
)
buckets = [VolumeBucket(bucket=str(r["bucket"]), count=r["count"]) for r in rows]
elif split_by == "level":
rows = await conn.execute_query_dict(
f"""
SELECT {trunc} AS bucket, level, count(*) AS count
FROM logs
WHERE team_id = $1 AND timestamp >= $2 AND timestamp <= $3
GROUP BY bucket, level ORDER BY bucket, level
""",
[str(team.id), start, end],
)
buckets = [VolumeBucket(bucket=str(r["bucket"]), level=r["level"], count=r["count"]) for r in rows]
else:
rows = await conn.execute_query_dict(
f"""
SELECT {trunc} AS bucket, source, count(*) AS count
FROM logs
WHERE team_id = $1 AND timestamp >= $2 AND timestamp <= $3
GROUP BY bucket, source ORDER BY bucket, source
""",
[str(team.id), start, end],
)
buckets = [VolumeBucket(bucket=str(r["bucket"]), source=r["source"], count=r["count"]) for r in rows]
Comment on lines +58 to +80
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Use .isoformat() instead of str() for the bucket datetime.

str(datetime_obj) produces "2024-01-01 00:00:00+00:00" (space separator), whereas ISO 8601 requires a T. ECharts time-axis parsing and JavaScript's new Date() are more reliably compatible with the T-separator form from .isoformat().

✏️ Proposed fix (all three bucket construction sites)
-        buckets = [VolumeBucket(bucket=str(r["bucket"]), count=r["count"]) for r in rows]
+        buckets = [VolumeBucket(bucket=r["bucket"].isoformat(), count=r["count"]) for r in rows]
-        buckets = [VolumeBucket(bucket=str(r["bucket"]), level=r["level"], count=r["count"]) for r in rows]
+        buckets = [VolumeBucket(bucket=r["bucket"].isoformat(), level=r["level"], count=r["count"]) for r in rows]
-        buckets = [VolumeBucket(bucket=str(r["bucket"]), source=r["source"], count=r["count"]) for r in rows]
+        buckets = [VolumeBucket(bucket=r["bucket"].isoformat(), source=r["source"], count=r["count"]) for r in rows]
🧰 Tools
🪛 Ruff (0.15.1)

[error] 61-66: Possible SQL injection vector through string-based query construction

(S608)


[error] 72-77: Possible SQL injection vector through string-based query construction

(S608)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@backend/app/api/analytics.py` around lines 58 - 80, The bucket datetime
strings are built with str(r["bucket"]) which yields a space-separated
timestamp; update all three VolumeBucket constructions (the list comprehensions
that create buckets for the default case, the "level" split, and the "source"
split) to call r["bucket"].isoformat() instead of str(r["bucket"]) so the bucket
values use ISO8601 with the "T" separator and are parsed correctly by
JS/ECharts.


# Totals by level
total_rows = await conn.execute_query_dict(
"""
SELECT level, count(*) AS count
FROM logs
WHERE team_id = $1 AND timestamp >= $2 AND timestamp <= $3
GROUP BY level
""",
[str(team.id), start, end],
)
totals = {r["level"]: r["count"] for r in total_rows}

return VolumeResponse(buckets=buckets, totals=totals)


@router.get("/{team_id}/analytics/top", response_model=TopResponse)
async def analytics_top(
team_id: UUID,
user: CurrentUser,
field: Literal["source", "message", "user_id"] = "source",
level: Annotated[list[LogLevel] | None, Query()] = None,
limit: int = Query(10, ge=1, le=100),
from_time: datetime | None = Query(None, alias="from"),
to_time: datetime | None = Query(None, alias="to"),
):
team, _ = await get_team_member(team_id, user)
start, end = _default_range(from_time, to_time)
conn = connections.get("default")

col = {"source": "source", "message": "message", "user_id": "user_id"}[field]

where = "team_id = $1 AND timestamp >= $2 AND timestamp <= $3"
params: list = [str(team.id), start, end]

if level:
level_values = [lv.value for lv in level]
placeholders = ", ".join(f"${i}" for i in range(4, 4 + len(level_values)))
where += f" AND level IN ({placeholders})"
params.extend(level_values)

# Exclude nulls for the grouped field
where += f" AND {col} IS NOT NULL"

rows = await conn.execute_query_dict(
f"""
SELECT {col} AS value, count(*) AS count
FROM logs
WHERE {where}
GROUP BY {col}
ORDER BY count DESC
LIMIT ${len(params) + 1}
""",
[*params, limit],
)

return TopResponse(items=[TopItem(value=r["value"], count=r["count"]) for r in rows])


@router.get("/{team_id}/analytics/heatmap", response_model=HeatmapResponse)
async def analytics_heatmap(
team_id: UUID,
user: CurrentUser,
source_limit: int = Query(20, ge=1, le=100),
from_time: datetime | None = Query(None, alias="from"),
to_time: datetime | None = Query(None, alias="to"),
):
team, _ = await get_team_member(team_id, user)
start, end = _default_range(from_time, to_time)
conn = connections.get("default")

# Get top N sources by volume
top_sources = await conn.execute_query_dict(
"""
SELECT source, count(*) AS count
FROM logs
WHERE team_id = $1 AND timestamp >= $2 AND timestamp <= $3
AND source IS NOT NULL
GROUP BY source
ORDER BY count DESC
LIMIT $4
""",
[str(team.id), start, end, source_limit],
)

source_list = [r["source"] for r in top_sources]

if not source_list:
return HeatmapResponse(sources=[], levels=VALID_LEVELS, data=[])

placeholders = ", ".join(f"${i}" for i in range(4, 4 + len(source_list)))
rows = await conn.execute_query_dict(
f"""
SELECT source, level, count(*) AS count
FROM logs
WHERE team_id = $1 AND timestamp >= $2 AND timestamp <= $3
AND source IN ({placeholders})
GROUP BY source, level
""",
[str(team.id), start, end, *source_list],
)

data = [HeatmapCell(source=r["source"], level=r["level"], count=r["count"]) for r in rows]

return HeatmapResponse(sources=source_list, levels=VALID_LEVELS, data=data)
2 changes: 2 additions & 0 deletions backend/app/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
from app.schemas.user import UserCreate, UserUpdate, UserResponse
from app.schemas.team import TeamCreate, TeamUpdate, TeamResponse, TeamWithKey, MembershipCreate, MembershipResponse
from app.schemas.log import LogCreate, LogBatchCreate, LogResponse, LogSearchParams, UserIdBackfillRequest, UserIdBackfillResponse
from app.schemas.analytics import VolumeResponse, VolumeBucket, TopResponse, TopItem, HeatmapResponse, HeatmapCell

__all__ = [
"Token", "TokenPayload", "LoginRequest", "RefreshRequest",
"UserCreate", "UserUpdate", "UserResponse",
"TeamCreate", "TeamUpdate", "TeamResponse", "TeamWithKey", "MembershipCreate", "MembershipResponse",
"LogCreate", "LogBatchCreate", "LogResponse", "LogSearchParams",
"UserIdBackfillRequest", "UserIdBackfillResponse",
"VolumeResponse", "VolumeBucket", "TopResponse", "TopItem", "HeatmapResponse", "HeatmapCell",
]
34 changes: 34 additions & 0 deletions backend/app/schemas/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pydantic import BaseModel


class VolumeBucket(BaseModel):
bucket: str
level: str | None = None
source: str | None = None
count: int


class VolumeResponse(BaseModel):
buckets: list[VolumeBucket]
totals: dict[str, int]


class TopItem(BaseModel):
value: str
count: int


class TopResponse(BaseModel):
items: list[TopItem]


class HeatmapCell(BaseModel):
source: str
level: str
count: int


class HeatmapResponse(BaseModel):
sources: list[str]
levels: list[str]
data: list[HeatmapCell]
Loading