-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add performance metrics commands
- Loading branch information
Showing
6 changed files
with
336 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
136 changes: 136 additions & 0 deletions
136
tutoraspects/templates/aspects/apps/superset/pythonpath/performance_metrics.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
from superset.app import create_app | ||
app = create_app() | ||
app.app_context().push() | ||
|
||
""" | ||
The plan | ||
1. Load the instructor dashboard | ||
2. Load the instructor dashboard datasets | ||
3. Perform the following operations for every dataset: | ||
a. Load the dataset | ||
b. Perform queries on the dataset with/without filters | ||
c. Perform aggregations on the dataset | ||
d. Measure the time taken for each operation | ||
e. Repeat 5 times and take metrics such as average, median, max, min, etc. | ||
""" | ||
from superset.extensions import db | ||
from flask import g | ||
from superset.models.dashboard import Dashboard | ||
from superset.models.slice import Slice | ||
from superset.charts.schemas import ChartDataQueryContextSchema | ||
from superset.charts.data.commands.get_data_command import ChartDataCommand | ||
from superset import security_manager | ||
import json | ||
from datetime import datetime | ||
import sqlparse | ||
from unittest.mock import patch | ||
import uuid | ||
import time | ||
|
||
ASPECTS_VERSION = "{{ASPECTS_VERSION}}" | ||
UUID = str(uuid.uuid4())[0:6] | ||
RUN_ID = f'aspects-{ASPECTS_VERSION}-{UUID}' | ||
|
||
def load_dashboards(): | ||
"""Load dashboards from the database.""" | ||
embedable_dashboards = {{ SUPERSET_EMBEDDABLE_DASHBOARDS }} | ||
dashboards = db.session.query(Dashboard).filter(Dashboard.slug.in_(embedable_dashboards)).all() | ||
return dashboards | ||
|
||
|
||
def performance_metrics(): | ||
"""Measure the performance of the dashboard.""" | ||
with patch('clickhouse_connect.common.build_client_name') as mock_build_client_name: | ||
mock_build_client_name.return_value = RUN_ID | ||
dashboards = load_dashboards() | ||
report = [] | ||
for dashboard in dashboards: | ||
print("Dashboard:", dashboard.slug) | ||
for slice in dashboard.slices: | ||
report.append(meassure_chart(slice)) | ||
return report | ||
|
||
|
||
report_format = """ | ||
{i}. {slice} | ||
Superset time: {superset_time}""" | ||
|
||
query_format = """ Query duration: {query_duration_ms} s. | ||
Result rows: {result_rows} | ||
Memory Usage (MB): {memory_usage_mb} | ||
Row count Clickhouse {rowcount:} | ||
Filters: {filters}""" | ||
|
||
def meassure_chart(slice, extra_filters=None): | ||
""" | ||
Meassure the performance of a chart and return the results. | ||
""" | ||
print("Slice:", slice) | ||
query_context = json.loads(slice.query_context) | ||
query_context.update({ | ||
"result_format": "json", | ||
"result_type": "full", | ||
"force": True, | ||
}) | ||
if extra_filters: | ||
query_context["filters"] = extra_filters | ||
g.user = security_manager.find_user(username="admin") | ||
query_context = ChartDataQueryContextSchema().load(query_context) | ||
command = ChartDataCommand(query_context) | ||
start_time = datetime.now() | ||
result = command.run() | ||
end_time = datetime.now() | ||
result["time_elapsed"] = '{} s.'.format((end_time-start_time).total_seconds()) | ||
result["slice"] = slice | ||
return result | ||
|
||
def get_query_log_from_clickhouse(report): | ||
chart_uuid = "bb13bb31-c797-4ed3-a7f9-7825cc6dc482" | ||
|
||
slice = db.session.query(Slice).filter(Slice.uuid == chart_uuid).one() | ||
query_context = json.loads(slice.query_context) | ||
query_context["queries"][0]["filters"].append( | ||
{ | ||
'col': 'http_user_agent', 'op': '==', 'val': RUN_ID | ||
} | ||
) | ||
slice.query_context = json.dumps(query_context) | ||
result = meassure_chart(slice) | ||
clickhouse_queries = {} | ||
for query in result["queries"]: | ||
for row in query["data"]: | ||
#for field in ["query_duration_ms", "result_rows", "event_time", "memory_usage_mb"]: | ||
#print(f"{field}: {row[field]}") | ||
parsed_sql = str(sqlparse.parse(row.pop("query"))[0]) | ||
clickhouse_queries[parsed_sql] = row | ||
|
||
|
||
print("\nSuperset Reports") | ||
for i, result in enumerate(report): | ||
print(report_format.format( | ||
i=(i+1), | ||
slice=result["slice"], | ||
superset_time=result["time_elapsed"] | ||
)) | ||
for i, query in enumerate(result["queries"]): | ||
parsed_sql = str(sqlparse.parse(query["query"])[0]).replace(";", "") + "\n FORMAT Native" | ||
clickhouse_report = clickhouse_queries.get(parsed_sql, {}) | ||
print(query_format.format( | ||
query_duration_ms=clickhouse_report.get("query_duration_ms") / 1000, | ||
memory_usage_mb =clickhouse_report.get("memory_usage_mb"), | ||
result_rows=clickhouse_report.get("result_rows"), | ||
rowcount=query["rowcount"], | ||
filters=query["applied_filters"], | ||
)) | ||
|
||
|
||
|
||
|
||
if __name__ == "__main__": | ||
print("Running performance metrics. RUN ID:", RUN_ID) | ||
report = performance_metrics() | ||
# Clickhouse query log takes some seconds to log queries. | ||
print("Waiting for clickhouse log...") | ||
time.sleep(10) | ||
get_query_log_from_clickhouse(report) |
43 changes: 43 additions & 0 deletions
43
...plates/aspects/build/aspects-superset/openedx-assets/assets/charts/Query_performance.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
_file_name: Query_performance.yaml | ||
cache_timeout: null | ||
certification_details: null | ||
certified_by: null | ||
dataset_uuid: 53099fd2-6739-45e5-9a93-fcb9fa70119f | ||
description: null | ||
params: | ||
adhoc_filters: | ||
- clause: WHERE | ||
comparator: No filter | ||
expressionType: SIMPLE | ||
operator: TEMPORAL_RANGE | ||
subject: event_time | ||
all_columns: [] | ||
color_pn: true | ||
extra_form_data: {} | ||
groupby: | ||
- http_user_agent | ||
- query_duration_ms | ||
- result_rows | ||
- event_time | ||
- memory_usage_mb | ||
- tables | ||
- query | ||
order_by_cols: [] | ||
order_desc: true | ||
percent_metrics: [] | ||
query_mode: aggregate | ||
row_limit: 1000 | ||
server_page_length: 10 | ||
show_cell_bars: true | ||
table_timestamp_format: smart_date | ||
temporal_columns_lookup: | ||
event_time: true | ||
time_grain_sqla: P1D | ||
viz_type: table | ||
query_context: '{"datasource":{"id":266,"type":"table"},"force":false,"queries":[{"filters":[{"col":"event_time","op":"TEMPORAL_RANGE","val":"No | ||
filter"}],"extras":{"time_grain_sqla":"P1D","having":"","where":""},"applied_time_extras":{},"columns":["http_user_agent","query_duration_ms","result_rows",{"timeGrain":"P1D","columnType":"BASE_AXIS","sqlExpression":"event_time","label":"event_time","expressionType":"SQL"},"memory_usage_mb","tables","query"],"metrics":[],"orderby":[],"annotation_layers":[],"row_limit":1000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"post_processing":[]}],"form_data":{"datasource":"266__table","viz_type":"table","slice_id":422,"query_mode":"aggregate","groupby":["http_user_agent","query_duration_ms","result_rows","event_time","memory_usage_mb","tables","query"],"time_grain_sqla":"P1D","temporal_columns_lookup":{"event_time":true},"all_columns":[],"percent_metrics":[],"adhoc_filters":[{"clause":"WHERE","comparator":"No | ||
filter","expressionType":"SIMPLE","operator":"TEMPORAL_RANGE","subject":"event_time"}],"order_by_cols":[],"row_limit":1000,"server_page_length":10,"order_desc":true,"table_timestamp_format":"smart_date","show_cell_bars":true,"color_pn":true,"extra_form_data":{},"dashboards":[],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}' | ||
slice_name: Query performance | ||
uuid: bb13bb31-c797-4ed3-a7f9-7825cc6dc482 | ||
version: 1.0.0 | ||
viz_type: table |
125 changes: 125 additions & 0 deletions
125
...ts/templates/aspects/build/aspects-superset/openedx-assets/assets/datasets/query_log.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
_file_name: query_log.yaml | ||
cache_timeout: null | ||
columns: | ||
- advanced_data_type: null | ||
column_name: event_time | ||
description: null | ||
expression: null | ||
extra: {} | ||
filterable: true | ||
groupby: true | ||
is_active: true | ||
is_dttm: true | ||
python_date_format: null | ||
type: DateTime | ||
verbose_name: Event Time | ||
- advanced_data_type: null | ||
column_name: query | ||
description: null | ||
expression: null | ||
extra: {} | ||
filterable: true | ||
groupby: true | ||
is_active: true | ||
is_dttm: false | ||
python_date_format: null | ||
type: String | ||
verbose_name: Query | ||
- advanced_data_type: null | ||
column_name: tables | ||
description: null | ||
expression: null | ||
extra: {} | ||
filterable: true | ||
groupby: true | ||
is_active: true | ||
is_dttm: false | ||
python_date_format: null | ||
type: Array(LowCardinality(String)) | ||
verbose_name: null | ||
- advanced_data_type: null | ||
column_name: http_user_agent | ||
description: null | ||
expression: null | ||
extra: {} | ||
filterable: true | ||
groupby: true | ||
is_active: true | ||
is_dttm: false | ||
python_date_format: null | ||
type: LowCardinality(String) | ||
verbose_name: null | ||
- advanced_data_type: null | ||
column_name: memory_usage_mb | ||
description: null | ||
expression: null | ||
extra: {} | ||
filterable: true | ||
groupby: true | ||
is_active: true | ||
is_dttm: false | ||
python_date_format: null | ||
type: Float64 | ||
verbose_name: null | ||
- advanced_data_type: null | ||
column_name: query_duration_ms | ||
description: null | ||
expression: null | ||
extra: {} | ||
filterable: true | ||
groupby: true | ||
is_active: true | ||
is_dttm: false | ||
python_date_format: null | ||
type: UInt64 | ||
verbose_name: null | ||
- advanced_data_type: null | ||
column_name: result_rows | ||
description: null | ||
expression: null | ||
extra: {} | ||
filterable: true | ||
groupby: true | ||
is_active: true | ||
is_dttm: false | ||
python_date_format: null | ||
type: UInt64 | ||
verbose_name: null | ||
database_uuid: 21174b6c-4d40-4958-8161-d6c3cf5e77b6 | ||
default_endpoint: null | ||
description: null | ||
extra: null | ||
fetch_values_predicate: null | ||
filter_select_enabled: true | ||
main_dttm_col: null | ||
metrics: | ||
- currency: null | ||
d3format: null | ||
description: null | ||
expression: COUNT(*) | ||
extra: | ||
warning_markdown: '' | ||
metric_name: count | ||
metric_type: count | ||
verbose_name: COUNT(*) | ||
warning_text: null | ||
normalize_columns: false | ||
offset: 0 | ||
params: null | ||
schema: main | ||
sql: | | ||
SELECT | ||
query_duration_ms, | ||
result_rows, | ||
memory_usage / (1024 * 1024 ) AS memory_usage_mb, | ||
event_time, | ||
tables, | ||
query, | ||
http_user_agent | ||
FROM system.query_log | ||
WHERE (has(databases, '{{ASPECTS_XAPI_DATABASE}}') OR has(databases, '{{ASPECTS_EVENT_SINK_DATABASE}}') OR has(databases, '{{DBT_PROFILE_TARGET_DATABASE}}')) AND (http_user_agent LIKE 'aspects-%') AND (type = 'QueryFinish') | ||
ORDER BY event_time DESC | ||
table_name: query_log | ||
template_params: null | ||
uuid: 53099fd2-6739-45e5-9a93-fcb9fa70119f | ||
version: 1.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,6 @@ SELECT | |
FROM | ||
system.query_log | ||
WHERE | ||
TYPE = 'QueryFinish' | ||
type = 'QueryFinish' | ||
ORDER BY | ||
query_duration_ms DESC; |