Skip to content

Commit 8b8d07e

Browse files
lkeegangithub-actions[bot]MaHaWo
authored
Refactor milestone answers statistics calculation (#229)
* Refactor milestone answers statistics calculation - refactor statistics calculation - add `update_stats` function which updates milestones and milestone group statistics - optional `incremental` argument - if `True`, then statistics updated with any new answers since last calculation (as before) - if `False`, then recalculates all statistics using all answers (may be needed if e.g. some junk answers are deleted by an admin) - add apscheduler to schedule regular calls of this function (based on 271c75e) - add `STATS_CRONTAB` to app settings to allow the schedule to be set, with a default crontab of 3am every monday - add fastapi_injectable to allow use of fastapi dependencies outside of fastapi routes - reduce duplication - resolves #203 - `AnswerSession` - add `expired` flag: initially False - set to True by `get_or_create_current_milestone_answer_session` if it was created 7 or more days ago - set to True when stats are updated if it was created 9 days or more ago - includes a grace period to avoid setting a currently in use answer session to expired - once an answer session is expired, then answers can no longer be modified / submitted by the user - this should ensure that answers cannot be modified after they have been included in the statistics - resolves #219 - add `included_in_statistics` flag: initially False - set to True once the answers from this session are included in the statistics - `MilestoneAnswer` - remove `included_in_milestone_statistics` and `included_in_milestonegroup_statistics` flags - this is now done at the level of an answer session rather than for each individual answer - milestone feedback functions - insert a `TrafficLight.invalid.value` instead of raising an exception if there are no statistics for a milestone id or group - no longer recalculate stats when constructing feedback to avoid slowing down a user request in this case * update openapi.json & openapi-ts client * add some comments and small fixes * attempt fix for flaky test --------- Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Harald Mack <harald.mack@protonmail.com>
1 parent a79e138 commit 8b8d07e

22 files changed

+762
-901
lines changed

docker-compose.yml

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ services:
1616
- PORT=${PORT:-80}
1717
- RELOAD=${RELOAD:-false}
1818
- LOG_LEVEL=${LOG_LEVEL:-info}
19+
- STATS_CRONTAB=${STATS_CRONTAB:-"0 3 * * mon"}
1920
logging:
2021
driver: "local"
2122
options:

frontend/src/lib/client/schemas.gen.ts

+1-6
Original file line numberDiff line numberDiff line change
@@ -543,15 +543,10 @@ export const MilestoneAgeScoreCollectionPublicSchema = {
543543
},
544544
type: 'array',
545545
title: 'Scores'
546-
},
547-
created_at: {
548-
type: 'string',
549-
format: 'date-time',
550-
title: 'Created At'
551546
}
552547
},
553548
type: 'object',
554-
required: ['milestone_id', 'expected_age', 'scores', 'created_at'],
549+
required: ['milestone_id', 'expected_age', 'scores'],
555550
title: 'MilestoneAgeScoreCollectionPublic'
556551
} as const;
557552

frontend/src/lib/client/types.gen.ts

-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ export type MilestoneAgeScoreCollectionPublic = {
137137
milestone_id: number;
138138
expected_age: number;
139139
scores: Array<MilestoneAgeScore>;
140-
created_at: string;
141140
};
142141

143142
export type MilestoneAnswerPublic = {

mondey_backend/openapi.json

+1-1
Large diffs are not rendered by default.

mondey_backend/pyproject.toml

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ dependencies = [
2424
"webp",
2525
"python-dateutil",
2626
"checkdigit",
27+
"apscheduler",
28+
"fastapi-injectable",
2729
]
2830
dynamic = ["version"]
2931

mondey_backend/src/mondey_backend/main.py

+15
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,12 @@
55
from contextlib import asynccontextmanager
66

77
import uvicorn
8+
from apscheduler.schedulers.asyncio import AsyncIOScheduler
9+
from apscheduler.triggers.cron import CronTrigger
810
from fastapi import FastAPI
911
from fastapi.middleware.cors import CORSMiddleware
1012
from fastapi.staticfiles import StaticFiles
13+
from fastapi_injectable.util import get_injected_obj
1114

1215
from .databases.mondey import create_mondey_db_and_tables
1316
from .databases.users import create_user_db_and_tables
@@ -18,13 +21,25 @@
1821
from .routers import research
1922
from .routers import users
2023
from .settings import app_settings
24+
from .statistics import update_stats
25+
26+
27+
def scheduled_update_stats():
28+
return get_injected_obj(update_stats)
2129

2230

2331
@asynccontextmanager
2432
async def lifespan(app: FastAPI):
2533
create_mondey_db_and_tables()
2634
await create_user_db_and_tables()
35+
scheduler = AsyncIOScheduler()
36+
scheduler.add_job(
37+
scheduled_update_stats,
38+
CronTrigger.from_crontab(app_settings.STATS_CRONTAB),
39+
)
40+
scheduler.start()
2741
yield
42+
scheduler.shutdown()
2843

2944

3045
def create_app() -> FastAPI:

mondey_backend/src/mondey_backend/models/milestones.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,6 @@ class MilestoneAnswer(SQLModel, table=True):
158158
)
159159
milestone_group_id: int = Field(default=None, foreign_key="milestonegroup.id")
160160
answer: int
161-
included_in_milestone_statistics: bool = False
162-
included_in_milestonegroup_statistics: bool = False
163161

164162

165163
class MilestoneAnswerSession(SQLModel, table=True):
@@ -171,6 +169,8 @@ class MilestoneAnswerSession(SQLModel, table=True):
171169
"server_default": text("CURRENT_TIMESTAMP"),
172170
}
173171
)
172+
expired: bool
173+
included_in_statistics: bool
174174
answers: Mapped[dict[int, MilestoneAnswer]] = dict_relationship(key="milestone_id")
175175

176176

@@ -214,7 +214,6 @@ class MilestoneAgeScoreCollectionPublic(SQLModel):
214214
milestone_id: int
215215
expected_age: int
216216
scores: list[MilestoneAgeScore]
217-
created_at: datetime.datetime
218217

219218

220219
class MilestoneGroupAgeScore(SQLModel, table=True):
@@ -235,8 +234,3 @@ class MilestoneGroupAgeScoreCollection(SQLModel, table=True):
235234
default=None, primary_key=True, foreign_key="milestonegroup.id"
236235
)
237236
scores: Mapped[list[MilestoneGroupAgeScore]] = back_populates("collection")
238-
created_at: datetime.datetime = Field(
239-
sa_column_kwargs={
240-
"server_default": text("CURRENT_TIMESTAMP"),
241-
}
242-
)

mondey_backend/src/mondey_backend/routers/scores.py

+34-78
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
from __future__ import annotations
22

33
import logging
4-
from datetime import datetime
5-
from datetime import timedelta
64
from enum import Enum
75
from typing import cast
86

@@ -15,8 +13,6 @@
1513
from ..models.milestones import MilestoneAnswerSession
1614
from ..models.milestones import MilestoneGroupAgeScore
1715
from ..models.milestones import MilestoneGroupAgeScoreCollection
18-
from .statistics import calculate_milestone_statistics_by_age
19-
from .statistics import calculate_milestonegroup_statistics_by_age
2016
from .utils import get_child_age_in_months
2117

2218

@@ -88,7 +84,6 @@ def compute_milestonegroup_feedback_summary(
8884
by first calculating the mean score over all milestones that belong to the milestonegroup that
8985
are relevant for the child when the given answersession was created. The mean is then
9086
compared against the mean and standard deviation over the known population of children for the child's age.
91-
When the statistics is outdated (older than a week currently) or there is none, it is recomputed and updated in the database.
9287
See `compute_feedback_simple` for the feedback logic.
9388
9489
Parameters
@@ -123,53 +118,35 @@ def compute_milestonegroup_feedback_summary(
123118
logger.debug(f" child age in months: {age}")
124119
# extract milestonegroups
125120
groups = set(answer.milestone_group_id for answer in answersession.answers.values())
126-
today = datetime.now()
127121

128122
# for each milestonegroup, get the statistics, compute the current mean, and compute the feedback
129-
# if the statistics is older than a week, we update it with the current data
130123
feedback: dict[int, int] = {}
131124
for group in groups:
132125
logger.debug(f" group: {group}")
133126
stats = session.get(MilestoneGroupAgeScoreCollection, group)
134-
logger.debug(f" old stats: {stats}")
135-
if stats is not None:
127+
if stats is None:
128+
logger.debug(" no stats")
129+
feedback[group] = TrafficLight.invalid.value
130+
else:
131+
logger.debug(f" stats: {stats}")
136132
for i, score in enumerate(stats.scores):
137133
if score.count > 0:
138134
logger.debug(
139-
f" old score: , {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
135+
f" score: , {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
140136
)
141-
142-
if stats is None or stats.created_at < today - timedelta(days=7):
143-
new_stats = calculate_milestonegroup_statistics_by_age(session, group)
144-
145-
if new_stats is None:
146-
raise ValueError("No statistics for milestone group: ", group)
147-
148-
# update stuff in database
149-
for i, new_score in enumerate(new_stats.scores):
150-
if new_score.count > 0:
151-
logger.debug(
152-
f" new_score: , {i}, {new_score.count}, {new_score.avg_score}, {new_score.stddev_score}"
153-
)
154-
session.merge(new_score)
155-
156-
session.merge(new_stats)
157-
session.commit()
158-
stats = new_stats
159-
160-
# extract the answers for the current milestone group
161-
group_answers = [
162-
answer.answer + 1
163-
for answer in answersession.answers.values()
164-
if answer.milestone_group_id == group
165-
]
166-
logger.debug(
167-
f' group answers: , {group_answers}, "mean: ", {np.mean(group_answers)}'
168-
)
169-
# use the statistics recorded for a certain age as the basis for the feedback computation
170-
feedback[group] = compute_feedback_simple(
171-
stats.scores[age], float(np.mean(group_answers))
172-
)
137+
# extract the answers for the current milestone group
138+
group_answers = [
139+
answer.answer + 1
140+
for answer in answersession.answers.values()
141+
if answer.milestone_group_id == group
142+
]
143+
logger.debug(
144+
f' group answers: , {group_answers}, "mean: ", {np.mean(group_answers)}'
145+
)
146+
# use the statistics recorded for a certain age as the basis for the feedback computation
147+
feedback[group] = compute_feedback_simple(
148+
stats.scores[age], float(np.mean(group_answers))
149+
)
173150
logger.debug(f"summary feedback: {feedback}")
174151
return feedback
175152

@@ -179,8 +156,8 @@ def compute_milestonegroup_feedback_detailed(
179156
) -> dict[int, dict[int, int]]:
180157
"""
181158
Compute the per-milestone (detailed) feedback for all answers in a given answersession.
182-
This is done by comparing the given answer per milestone against the mean and standard deviation of the known population of children for the child's age. If this statistics is outdated (older than a week currently) or is
183-
missing, it is recomputed and updated in the database. See `compute_feedback_simple` for the feedback logic.
159+
This is done by comparing the given answer per milestone against the mean and standard deviation of the known population of children for the child's age.
160+
See `compute_feedback_simple` for the feedback logic.
184161
Return a dictionary mapping milestonegroup -> [milestone -> feedback].
185162
Parameters
186163
----------
@@ -214,49 +191,28 @@ def compute_milestonegroup_feedback_detailed(
214191

215192
age = get_child_age_in_months(child, answersession.created_at)
216193
logger.debug(f" child age in months: {age}")
217-
today = datetime.today()
218194

219195
# for each milestonegroup, get the statistics, compute the current mean, and compute the feedback
220196
feedback: dict[int, dict[int, int]] = {}
221197
for milestone_id, answer in answersession.answers.items():
222-
# try to get statistics for the current milestone and update it if it's not there
223-
# or is too old
198+
logger.debug(f" milestone id: {milestone_id}, answer: {answer.answer + 1}")
224199
stats = session.get(MilestoneAgeScoreCollection, milestone_id)
225-
logger.debug(f" old stats: {stats}")
226-
if stats is not None:
200+
logger.debug(f" stats: {stats}")
201+
if answer.milestone_group_id not in feedback:
202+
feedback[answer.milestone_group_id] = {}
203+
if stats is None:
204+
feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
205+
TrafficLight.invalid.value
206+
)
207+
else:
227208
for i, score in enumerate(stats.scores):
228209
if score.count > 0:
229210
logger.debug(
230-
f" old score: {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
211+
f" score: {i}, {score.count}, {score.avg_score}, {score.stddev_score}"
231212
)
232-
233-
if stats is None or stats.created_at < today - timedelta(days=7):
234-
new_stats = calculate_milestone_statistics_by_age(session, milestone_id)
235-
236-
if new_stats is None:
237-
raise ValueError(
238-
"No new statistics could be calculated for milestone: ",
239-
milestone_id,
240-
)
241-
242-
# update stuff in database
243-
for i, new_score in enumerate(new_stats.scores):
244-
if new_score.count > 0:
245-
logger.debug(
246-
f" new_score: , {i}, {new_score.count}, {new_score.avg_score}, {new_score.stddev_score}"
247-
)
248-
session.merge(new_score)
249-
250-
session.merge(new_stats)
251-
session.commit()
252-
stats = new_stats
253-
254-
if answer.milestone_group_id not in feedback:
255-
feedback[answer.milestone_group_id] = {}
256-
257-
feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
258-
compute_feedback_simple(stats.scores[age], answer.answer + 1)
259-
)
213+
feedback[answer.milestone_group_id][cast(int, answer.milestone_id)] = (
214+
compute_feedback_simple(stats.scores[age], answer.answer + 1)
215+
)
260216

261217
logger.debug(f" detailed feedback: {feedback}")
262218

0 commit comments

Comments
 (0)