fix: Remove query context after asset import

Fixes an issue where dashboards do not load in the LMS instructor dashboard due to query_context containing database primary keys from the database it was exported from. Performance metrics uses the query_context, so now it pulls that directly from the asset files on disk instead.
openedx · Apr 22, 2024 · 992f0cc · 992f0cc
1 parent d47a1b1
commit 992f0cc
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 23 deletions.
diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/create_assets.py
@@ -15,6 +15,7 @@
 from superset.examples.utils import load_configs_from_directory
 from superset.extensions import db
 from superset.models.dashboard import Dashboard
+from superset.models.slice import Slice
 from superset.connectors.sqla.models import SqlaTable
 from superset.utils.database import get_or_create_db
 from superset.models.embedded_dashboard import EmbeddedDashboard
@@ -235,6 +236,16 @@ def import_assets():
         force_data=False,
     )
 
+    # Query contexts use slice IDs instead of UUIDs, which breaks for us
+    # especially in translated datasets. We do need them for the
+    # performance_metric script however, so we keep them in the assets.
+    # This just blanks them in the database after import, which forces a
+    # query to get the assets instead of using the query context.
+    for o in db.session.query(Slice).all():
+        if o.query_context:
+            o.query_context = None
+    db.session.commit()
+
 
 def update_dashboard_roles(roles):
     """Update the roles of the dashboards"""
@@ -249,7 +260,7 @@ def update_dashboard_roles(roles):
 
     for dashboard_uuid, role_ids in roles.items():
         dashboard = db.session.query(Dashboard).filter_by(uuid=dashboard_uuid).one()
-        print("Importing dashboard roles", dashboard_uuid, role_ids)
+        logger.info("Importing dashboard roles", dashboard_uuid, role_ids)
         dashboard.roles = role_ids
         if owners:
             dashboard.owners = owners
@@ -270,10 +281,10 @@ def update_embeddable_uuids():
 
 def create_embeddable_dashboard_by_slug(dashboard_slug, embeddable_uuid):
     """Create an embeddable dashboard by slug"""
-    print(f"Creating embeddable dashboard {dashboard_slug}, {embeddable_uuid}")
+    logger.info(f"Creating embeddable dashboard {dashboard_slug}, {embeddable_uuid}")
     dashboard = db.session.query(Dashboard).filter_by(slug=dashboard_slug).first()
     if dashboard is None:
-        print(f"WARNING: Dashboard {dashboard_slug} not found")
+        logger.info(f"WARNING: Dashboard {dashboard_slug} not found")
         return
 
     embedded_dashboard = db.session.query(EmbeddedDashboard).filter_by(dashboard_id=dashboard.id).first()
@@ -287,13 +298,13 @@ def create_embeddable_dashboard_by_slug(dashboard_slug, embeddable_uuid):
 
 def update_datasets():
     """Update the datasets"""
-    print("Refreshing datasets")
+    logger.info("Refreshing datasets")
     if {{SUPERSET_REFRESH_DATASETS}}:
         datasets = (
             db.session.query(SqlaTable).all()
         )
         for dataset in datasets:
-            print(f"Refreshing dataset {dataset.table_name}")
+            logger.info(f"Refreshing dataset {dataset.table_name}")
             dataset.fetch_metadata(commit=True)
 
 

diff --git a/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py b/tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py
@@ -1,19 +1,25 @@
-import sys
+"""
+Gather performance metrics on Superset chart queries.
 
-from superset.app import create_app
+Reads the queries from the superset database, and enriches them with the
+query_context from the asset files. The query_context cannot be stored in the
+database on import due to is using database primary keys which do not match
+across Superset installations.
+"""
 
-app = create_app()
-app.app_context().push()
+from create_assets import BASE_DIR, ASSET_FOLDER_MAPPING, app
 
 import json
 import logging
+import os
 import time
 import uuid
 from datetime import datetime
 from unittest.mock import patch
 
 import click
 import sqlparse
+import yaml
 from flask import g
 from superset import security_manager
 from superset.commands.chart.data.get_data_command import ChartDataCommand
@@ -27,6 +33,7 @@
 ASPECTS_VERSION = "{{ASPECTS_VERSION}}"
 UUID = str(uuid.uuid4())[0:6]
 RUN_ID = f"aspects-{ASPECTS_VERSION}-{UUID}"
+CHART_PATH = "/app/openedx-assets/assets/charts/"
 
 report_format = "{i}. {slice}\n" "Superset time: {superset_time} (s).\n"
 
@@ -64,10 +71,12 @@ def performance_metrics(course_key, print_sql):
             .all()
         )
         report = []
+        query_contexts = get_query_contexts_from_assets()
         for dashboard in dashboards:
             logger.info(f"Dashboard: {dashboard.slug}")
             for slice in dashboard.slices:
-                result = measure_chart(slice, extra_filters)
+                query_context = get_slice_query_context(slice, query_contexts)
+                result = measure_chart(slice, query_context)
                 if not result:
                     continue
                 for query in result["queries"]:
@@ -78,16 +87,33 @@ def performance_metrics(course_key, print_sql):
 
         logger.info("Waiting for clickhouse log...")
         time.sleep(20)
-        get_query_log_from_clickhouse(report, print_sql)
+        get_query_log_from_clickhouse(report, query_contexts, print_sql)
         return report
 
 
-def measure_chart(slice, extra_filters=[]):
-    """
-    Measure the performance of a chart and return the results.
-    """
-    logger.info(f"Fetching slice data: {slice}")
-    query_context = json.loads(slice.query_context)
+def get_query_contexts_from_assets():
+    query_contexts = {}
+
+    for root, dirs, files in os.walk(CHART_PATH):
+        for file in files:
+            if not file.endswith(".yaml"):
+                continue
+
+            path = os.path.join(root, file)
+            with open(path, "r") as file:
+                asset = yaml.safe_load(file)
+                if "query_context" in asset and asset["query_context"]:
+                    query_contexts[asset["uuid"]] = json.loads(asset["query_context"])
+
+    logger.info(f"Found {len(query_contexts)} query contexts")
+    return query_contexts
+
+def get_slice_query_context(slice, query_contexts, extra_filters=[]):
+    query_context = query_contexts.get(str(slice.uuid), {})
+    if not query_context:
+        logger.info(f"SLICE {slice} has no query context! {slice.uuid}")
+        logger.info(query_contexts.keys())
+
     query_context.update(
         {
             "result_format": "json",
@@ -104,6 +130,15 @@ def measure_chart(slice, extra_filters=[]):
         for query in query_context["queries"]:
             query["filters"]+=extra_filters
 
+    return query_context
+
+
+def measure_chart(slice, query_context):
+    """
+    Measure the performance of a chart and return the results.
+    """
+    logger.info(f"Fetching slice data: {slice}")
+
     g.user = security_manager.find_user(username="{{SUPERSET_ADMIN_USERNAME}}")
     query_context = ChartDataQueryContextSchema().load(query_context)
     command = ChartDataCommand(query_context)
@@ -121,7 +156,7 @@ def measure_chart(slice, extra_filters=[]):
     return result
 
 
-def get_query_log_from_clickhouse(report, print_sql):
+def get_query_log_from_clickhouse(report, query_contexts, print_sql):
     """
     Get the query log from clickhouse and print the results.
     """
@@ -130,13 +165,12 @@ def get_query_log_from_clickhouse(report, print_sql):
 
     slice = db.session.query(Slice).filter(Slice.uuid == chart_uuid).one()
 
-    query_context = json.loads(slice.query_context)
+    query_context = get_slice_query_context(slice, query_contexts)
     query_context["queries"][0]["filters"].append(
         {"col": "http_user_agent", "op": "==", "val": RUN_ID}
     )
-    slice.query_context = json.dumps(query_context)
 
-    ch_chart_result = measure_chart(slice)
+    ch_chart_result = measure_chart(slice, query_context)
 
     clickhouse_queries = {}
     for query in ch_chart_result["queries"]:
@@ -145,7 +179,7 @@ def get_query_log_from_clickhouse(report, print_sql):
             clickhouse_queries[parsed_sql] = row
 
             if print_sql:
-                print("ClickHouse SQL: ")
+                logger.info("ClickHouse SQL: ")
                 logger.info(parsed_sql)
 
     # Sort report by slowest queries
@@ -167,7 +201,7 @@ def get_query_log_from_clickhouse(report, print_sql):
             )
 
             if print_sql:
-                print("Superset SQL: ")
+                logger.info("Superset SQL: ")
                 logger.info(parsed_sql)
 
             clickhouse_report = clickhouse_queries.get(parsed_sql, {})