Skip to content

Commit

Permalink
feat: add performance metrics commands
Browse files Browse the repository at this point in the history
  • Loading branch information
Ian2012 committed Mar 15, 2024
1 parent daf3b92 commit db3cc91
Show file tree
Hide file tree
Showing 6 changed files with 336 additions and 1 deletion.
15 changes: 15 additions & 0 deletions tutoraspects/commands_v0.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,21 @@ def alembic(context, command) -> None:
"""
runner.run_job("aspects", command)

# Ex: "tutor local do performance-metrics "
@click.command(context_settings={"ignore_unknown_options": True})
@click.pass_obj
def performance_metrics(context) -> None:
"""
Job to import Superset assets.
"""
config = tutor_config.load(context.root)
runner = context.job_runner(config)

command = """echo 'Performance...' &&
python /app/pythonpath/performance_metrics.py &&
echo 'Done!';
"""
runner.run_job("superset", command)

# Ex: "tutor local do import_assets "
@click.command(context_settings={"ignore_unknown_options": True})
Expand Down
16 changes: 16 additions & 0 deletions tutoraspects/commands_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,21 @@ def import_assets() -> list[tuple[str, str]]:
),
]

# Ex: "tutor local do import_assets "
@click.command(context_settings={"ignore_unknown_options": True})
def performance_metrics() -> list[tuple[str, str]]:
"""
Job to import Superset assets.
"""
return [
(
"superset",
"echo 'Performance...' && "
"python /app/pythonpath/performance_metrics.py &&"
"echo 'Done!';",
),
]


# Ex: "tutor local do dump_data_to_clickhouse "
@click.command(context_settings={"ignore_unknown_options": True})
Expand Down Expand Up @@ -311,6 +326,7 @@ def check_superset_assets():
dump_data_to_clickhouse,
transform_tracking_logs,
import_assets,
performance_metrics,
)

COMMANDS = (aspects,)
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from superset.app import create_app
app = create_app()
app.app_context().push()

"""
The plan
1. Load the instructor dashboard
2. Load the instructor dashboard datasets
3. Perform the following operations for every dataset:
a. Load the dataset
b. Perform queries on the dataset with/without filters
c. Perform aggregations on the dataset
d. Measure the time taken for each operation
e. Repeat 5 times and take metrics such as average, median, max, min, etc.
"""
from superset.extensions import db
from flask import g
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.charts.schemas import ChartDataQueryContextSchema
from superset.charts.data.commands.get_data_command import ChartDataCommand
from superset import security_manager
import json
from datetime import datetime
import sqlparse
from unittest.mock import patch
import uuid
import time

ASPECTS_VERSION = "{{ASPECTS_VERSION}}"
UUID = str(uuid.uuid4())[0:6]
RUN_ID = f'aspects-{ASPECTS_VERSION}-{UUID}'

def load_dashboards():
"""Load dashboards from the database."""
embedable_dashboards = {{ SUPERSET_EMBEDDABLE_DASHBOARDS }}
dashboards = db.session.query(Dashboard).filter(Dashboard.slug.in_(embedable_dashboards)).all()
return dashboards


def performance_metrics():
"""Measure the performance of the dashboard."""
with patch('clickhouse_connect.common.build_client_name') as mock_build_client_name:
mock_build_client_name.return_value = RUN_ID
dashboards = load_dashboards()
report = []
for dashboard in dashboards:
print("Dashboard:", dashboard.slug)
for slice in dashboard.slices:
report.append(meassure_chart(slice))
return report


report_format = """
{i}. {slice}
Superset time: {superset_time}"""

query_format = """ Query duration: {query_duration_ms} s.
Result rows: {result_rows}
Memory Usage (MB): {memory_usage_mb}
Row count Clickhouse {rowcount:}
Filters: {filters}"""

def meassure_chart(slice, extra_filters=None):
"""
Meassure the performance of a chart and return the results.
"""
print("Slice:", slice)
query_context = json.loads(slice.query_context)
query_context.update({
"result_format": "json",
"result_type": "full",
"force": True,
})
if extra_filters:
query_context["filters"] = extra_filters
g.user = security_manager.find_user(username="admin")
query_context = ChartDataQueryContextSchema().load(query_context)
command = ChartDataCommand(query_context)
start_time = datetime.now()
result = command.run()
end_time = datetime.now()
result["time_elapsed"] = '{} s.'.format((end_time-start_time).total_seconds())
result["slice"] = slice
return result

def get_query_log_from_clickhouse(report):
chart_uuid = "bb13bb31-c797-4ed3-a7f9-7825cc6dc482"

slice = db.session.query(Slice).filter(Slice.uuid == chart_uuid).one()
query_context = json.loads(slice.query_context)
query_context["queries"][0]["filters"].append(
{
'col': 'http_user_agent', 'op': '==', 'val': RUN_ID
}
)
slice.query_context = json.dumps(query_context)
result = meassure_chart(slice)
clickhouse_queries = {}
for query in result["queries"]:
for row in query["data"]:
#for field in ["query_duration_ms", "result_rows", "event_time", "memory_usage_mb"]:
#print(f"{field}: {row[field]}")
parsed_sql = str(sqlparse.parse(row.pop("query"))[0])
clickhouse_queries[parsed_sql] = row


print("\nSuperset Reports")
for i, result in enumerate(report):
print(report_format.format(
i=(i+1),
slice=result["slice"],
superset_time=result["time_elapsed"]
))
for i, query in enumerate(result["queries"]):
parsed_sql = str(sqlparse.parse(query["query"])[0]).replace(";", "") + "\n FORMAT Native"
clickhouse_report = clickhouse_queries.get(parsed_sql, {})
print(query_format.format(
query_duration_ms=clickhouse_report.get("query_duration_ms") / 1000,
memory_usage_mb =clickhouse_report.get("memory_usage_mb"),
result_rows=clickhouse_report.get("result_rows"),
rowcount=query["rowcount"],
filters=query["applied_filters"],
))




if __name__ == "__main__":
print("Running performance metrics. RUN ID:", RUN_ID)
report = performance_metrics()
# Clickhouse query log takes some seconds to log queries.
print("Waiting for clickhouse log...")
time.sleep(10)
get_query_log_from_clickhouse(report)
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
_file_name: Query_performance.yaml
cache_timeout: null
certification_details: null
certified_by: null
dataset_uuid: 53099fd2-6739-45e5-9a93-fcb9fa70119f
description: null
params:
adhoc_filters:
- clause: WHERE
comparator: No filter
expressionType: SIMPLE
operator: TEMPORAL_RANGE
subject: event_time
all_columns: []
color_pn: true
extra_form_data: {}
groupby:
- http_user_agent
- query_duration_ms
- result_rows
- event_time
- memory_usage_mb
- tables
- query
order_by_cols: []
order_desc: true
percent_metrics: []
query_mode: aggregate
row_limit: 1000
server_page_length: 10
show_cell_bars: true
table_timestamp_format: smart_date
temporal_columns_lookup:
event_time: true
time_grain_sqla: P1D
viz_type: table
query_context: '{"datasource":{"id":266,"type":"table"},"force":false,"queries":[{"filters":[{"col":"event_time","op":"TEMPORAL_RANGE","val":"No
filter"}],"extras":{"time_grain_sqla":"P1D","having":"","where":""},"applied_time_extras":{},"columns":["http_user_agent","query_duration_ms","result_rows",{"timeGrain":"P1D","columnType":"BASE_AXIS","sqlExpression":"event_time","label":"event_time","expressionType":"SQL"},"memory_usage_mb","tables","query"],"metrics":[],"orderby":[],"annotation_layers":[],"row_limit":1000,"series_limit":0,"order_desc":true,"url_params":{},"custom_params":{},"custom_form_data":{},"post_processing":[]}],"form_data":{"datasource":"266__table","viz_type":"table","slice_id":422,"query_mode":"aggregate","groupby":["http_user_agent","query_duration_ms","result_rows","event_time","memory_usage_mb","tables","query"],"time_grain_sqla":"P1D","temporal_columns_lookup":{"event_time":true},"all_columns":[],"percent_metrics":[],"adhoc_filters":[{"clause":"WHERE","comparator":"No
filter","expressionType":"SIMPLE","operator":"TEMPORAL_RANGE","subject":"event_time"}],"order_by_cols":[],"row_limit":1000,"server_page_length":10,"order_desc":true,"table_timestamp_format":"smart_date","show_cell_bars":true,"color_pn":true,"extra_form_data":{},"dashboards":[],"force":false,"result_format":"json","result_type":"full"},"result_format":"json","result_type":"full"}'
slice_name: Query performance
uuid: bb13bb31-c797-4ed3-a7f9-7825cc6dc482
version: 1.0.0
viz_type: table
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
_file_name: query_log.yaml
cache_timeout: null
columns:
- advanced_data_type: null
column_name: event_time
description: null
expression: null
extra: {}
filterable: true
groupby: true
is_active: true
is_dttm: true
python_date_format: null
type: DateTime
verbose_name: Event Time
- advanced_data_type: null
column_name: query
description: null
expression: null
extra: {}
filterable: true
groupby: true
is_active: true
is_dttm: false
python_date_format: null
type: String
verbose_name: Query
- advanced_data_type: null
column_name: tables
description: null
expression: null
extra: {}
filterable: true
groupby: true
is_active: true
is_dttm: false
python_date_format: null
type: Array(LowCardinality(String))
verbose_name: null
- advanced_data_type: null
column_name: http_user_agent
description: null
expression: null
extra: {}
filterable: true
groupby: true
is_active: true
is_dttm: false
python_date_format: null
type: LowCardinality(String)
verbose_name: null
- advanced_data_type: null
column_name: memory_usage_mb
description: null
expression: null
extra: {}
filterable: true
groupby: true
is_active: true
is_dttm: false
python_date_format: null
type: Float64
verbose_name: null
- advanced_data_type: null
column_name: query_duration_ms
description: null
expression: null
extra: {}
filterable: true
groupby: true
is_active: true
is_dttm: false
python_date_format: null
type: UInt64
verbose_name: null
- advanced_data_type: null
column_name: result_rows
description: null
expression: null
extra: {}
filterable: true
groupby: true
is_active: true
is_dttm: false
python_date_format: null
type: UInt64
verbose_name: null
database_uuid: 21174b6c-4d40-4958-8161-d6c3cf5e77b6
default_endpoint: null
description: null
extra: null
fetch_values_predicate: null
filter_select_enabled: true
main_dttm_col: null
metrics:
- currency: null
d3format: null
description: null
expression: COUNT(*)
extra:
warning_markdown: ''
metric_name: count
metric_type: count
verbose_name: COUNT(*)
warning_text: null
normalize_columns: false
offset: 0
params: null
schema: main
sql: |
SELECT
query_duration_ms,
result_rows,
memory_usage / (1024 * 1024 ) AS memory_usage_mb,
event_time,
tables,
query,
http_user_agent
FROM system.query_log
WHERE (has(databases, '{{ASPECTS_XAPI_DATABASE}}') OR has(databases, '{{ASPECTS_EVENT_SINK_DATABASE}}') OR has(databases, '{{DBT_PROFILE_TARGET_DATABASE}}')) AND (http_user_agent LIKE 'aspects-%') AND (type = 'QueryFinish')
ORDER BY event_time DESC
table_name: query_log
template_params: null
uuid: 53099fd2-6739-45e5-9a93-fcb9fa70119f
version: 1.0.0
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ SELECT
FROM
system.query_log
WHERE
TYPE = 'QueryFinish'
type = 'QueryFinish'
ORDER BY
query_duration_ms DESC;

0 comments on commit db3cc91

Please sign in to comment.