Skip to content

Commit

Permalink
Merge pull request #780 from mapswipe/feature/speed-up-aggregate-calc…
Browse files Browse the repository at this point in the history
…ulation

Pre-calculate total geo area and time max limit for project groups
  • Loading branch information
thenav56 authored Jun 13, 2023
2 parents 1959557 + 6c0dbd3 commit ec64f11
Show file tree
Hide file tree
Showing 14 changed files with 521 additions and 484 deletions.
2 changes: 1 addition & 1 deletion django/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update -y \
# For postgis
gdal-bin \
# Upgrade pip and install python packages for code
&& pip install --upgrade --no-cache-dir pip poetry==1.2.1 \
&& pip install --upgrade --no-cache-dir pip poetry==1.5.1 \
&& poetry --version \
# Configure to use system instead of virtualenvs
&& poetry config virtualenvs.create false \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,70 @@
# |1|00:00:00.208768|00:00:01.398161|00:00:28.951521|
# |2|00:00:01.330297|00:00:06.076814|00:00:03.481192|
# |3|00:00:02.092967|00:00:11.271081|00:00:06.045881|
TASK_GROUP_METADATA_QUERY = f"""
SELECT
project_id,
group_id,
SUM(
ST_Area(geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area, -- sqkm
(
CASE
-- Using 95_percent value of existing data for each project_type
WHEN UG.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
WHEN UG.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
WHEN UG.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
-- FOOTPRINT: Not calculated right now
WHEN UG.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
ELSE 1
END
) * COUNT(*) as time_spent_max_allowed
FROM tasks T
INNER JOIN used_task_groups UG USING (project_id, group_id)
GROUP BY project_id, project_type, group_id
UPDATE_PROJECT_GROUP_DATA = f"""
WITH to_calculate_groups AS (
SELECT
project_id,
group_id
FROM groups
WHERE
(project_id, group_id) in (
SELECT
MS.project_id,
MS.group_id
FROM mapping_sessions MS
WHERE
MS.start_time >= %(from_date)s
AND MS.start_time < %(until_date)s
GROUP BY MS.project_id, MS.group_id
) AND
(
total_area is NULL OR time_spent_max_allowed is NULL
)
),
groups_data AS (
SELECT
T.project_id,
T.group_id,
SUM( -- sqkm
ST_Area(T.geom::geography(GEOMETRY,4326)) / 1000000
) as total_task_group_area,
(
CASE
-- Using 95_percent value of existing data for each project_type
WHEN P.project_type = {Project.Type.BUILD_AREA.value} THEN 1.4
WHEN P.project_type = {Project.Type.COMPLETENESS.value} THEN 1.4
WHEN P.project_type = {Project.Type.CHANGE_DETECTION.value} THEN 11.2
-- FOOTPRINT: Not calculated right now
WHEN P.project_type = {Project.Type.FOOTPRINT.value} THEN 6.1
ELSE 1
END
) * COUNT(*) as time_spent_max_allowed
FROM tasks T
INNER JOIN to_calculate_groups G USING (project_id, group_id)
INNER JOIN projects P USING (project_id)
GROUP BY project_id, P.project_type, group_id
)
UPDATE groups G
SET
total_area = GD.total_task_group_area,
time_spent_max_allowed = GD.time_spent_max_allowed
FROM groups_data GD
WHERE
G.project_id = GD.project_id AND
G.group_id = GD.group_id;
"""

TASK_GROUP_METADATA_QUERY = """
SELECT
G.project_id,
G.group_id,
G.total_area as total_task_group_area,
G.time_spent_max_allowed
FROM groups G
INNER JOIN used_task_groups UG USING (project_id, group_id)
INNER JOIN projects P USING (project_id)
GROUP BY G.project_id, P.project_type, G.group_id
"""


Expand Down Expand Up @@ -239,6 +282,20 @@ def _track(self, tracker_type, label, sql):
until_date=until_date.strftime("%Y-%m-%d"),
)
start_time = time.time()

self.stdout.write(
f"Updating Project Group Data for {label.title()} for date: {params}"
)
with transaction.atomic():
with connection.cursor() as cursor:
cursor.execute(UPDATE_PROJECT_GROUP_DATA, params)
self.stdout.write(
self.style.SUCCESS(
f"Successfull. Runtime: {time.time() - start_time} seconds"
)
)

start_time = time.time()
self.stdout.write(f"Updating {label.title()} Data for date: {params}")
with transaction.atomic():
with connection.cursor() as cursor:
Expand Down
5 changes: 5 additions & 0 deletions django/apps/existing_database/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ class Migration(migrations.Migration):
("required_count", models.IntegerField(blank=True, null=True)),
("progress", models.IntegerField(blank=True, null=True)),
("project_type_specifics", models.TextField(blank=True, null=True)),
("total_area", models.FloatField(blank=True, null=True, default=None)),
(
"time_spent_max_allowed",
models.FloatField(blank=True, null=True, default=None),
),
],
options={
"db_table": "groups",
Expand Down
5 changes: 4 additions & 1 deletion django/apps/existing_database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,10 @@ class Group(Model):
required_count = models.IntegerField(blank=True, null=True)
progress = models.IntegerField(blank=True, null=True)
# Database uses JSON instead of JSONB (not supported by django)
project_type_specifics = models.TextField(blank=True, null=True)
project_type_specifics = models.TextField(blank=True, null=True, default=None)
# Used by aggreagated module
total_area = models.FloatField(blank=True, null=True, default=None)
time_spent_max_allowed = models.FloatField(blank=True, null=True, default=None)

# Django derived fields from ForeignKey
project_id: str
Expand Down
Loading

0 comments on commit ec64f11

Please sign in to comment.