Skip to content

Commit

Permalink
[QPROF] Huge Improvements (#1282)
Browse files Browse the repository at this point in the history
- Optimized computations making the tree creation faster.
 - Better Legend, correctly organised and taking care of dashed/dotted links.
 - Possibility to display trees based on thresholds.

Should close:
 - https://jira.verticacorp.com/jira/browse/VER-97041?filter=-1
 - https://jira.verticacorp.com/jira/browse/VER-97043?filter=-1
  • Loading branch information
oualib authored Sep 25, 2024
1 parent fd1df5e commit de14804
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 16 deletions.
2 changes: 1 addition & 1 deletion verticapy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
__license__: str = "Apache License, Version 2.0"
__version__: str = "1.0.5"
__iteration__: int = 1
__date__: str = "20092024"
__date__: str = "25092024"
__last_commit__: str = "d15329cab9a2360454f4473e676068d2f793b965"
__long_version__: str = f"{__version__}-{__iteration__}{__date__}-{__last_commit__}"
__codecov__: float = 0.84
Expand Down
49 changes: 45 additions & 4 deletions verticapy/performance/vertica/qprof.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,10 @@ def __init__(
else:
self.statement_id = 1

# To store metrics and not recompute them.
self.transaction_all_metrics = {}
self.transaction_tr_order_all = {}

# BUILDING THE target_schema.
if target_schema == "v_temp_schema":
self.target_schema = self._v_temp_schema_dict()
Expand Down Expand Up @@ -2746,6 +2750,9 @@ def _get_qplan_tr_order(
For more details, please look at
:py:class:`~verticapy.performance.vertica.qprof.QueryProfiler`.
"""
current_tuple = (self.transaction_id, self.statement_id)
if current_tuple in self.transaction_tr_order_all:
return self.transaction_tr_order_all[current_tuple]
query = f"""
SELECT
REGEXP_SUBSTR(step_label, '\\d+')::INT
Expand All @@ -2763,16 +2770,26 @@ def _get_qplan_tr_order(
title="Getting the corresponding query",
method="fetchall",
)
return list(dict.fromkeys([q[0] for q in res]))
res = list(dict.fromkeys([q[0] for q in res]))
except:
return []
res = []

# Storing the metrics.
self.transaction_tr_order_all[current_tuple] = res

return res

def _get_metric_val(self):
"""
Helper function to returns the
operator statistics.
"""
# Init
# If stored, we return the value.
current_tuple = (self.transaction_id, self.statement_id)
if current_tuple in self.transaction_all_metrics:
return self.transaction_all_metrics[current_tuple]

# Init.
query = self.get_qexecution_report(granularity=1, genSQL=True)
cols = self.get_qexecution_report(return_cols=True)
res = _executeSQL(
Expand All @@ -2797,7 +2814,7 @@ def _get_metric_val(self):
current_metric = -1
metric_value_op[me[0]][me[2]][col] = current_metric

# Summary
# Summary.
query = self.get_qexecution_report(granularity=2, genSQL=True)
res = _executeSQL(
query,
Expand All @@ -2819,6 +2836,10 @@ def _get_metric_val(self):
current_metric = -1
metric_value[col][me[0]] = current_metric

# Storing the metrics.
self.transaction_all_metrics[current_tuple] = metric_value_op, metric_value

# Returning the metric.
return metric_value_op, metric_value

def _get_vdf_summary(self):
Expand Down Expand Up @@ -3202,6 +3223,26 @@ def get_qplan_tree(
NULL values.
Default: '#EFEFEF' (light
gray)
- threshold_metric1:
Threshold used to disable
some specific ``path_id``
based on the first metric.
If the ``path_id`` value
is under this value: A
minimalist representation
of the corresponding
``path_id`` will be used.
Default: None
- threshold_metric2:
Threshold used to disable
some specific ``path_id``
based on the first metric.
If the ``path_id`` value
is under this value: A
minimalist representation
of the corresponding
``path_id`` will be used.
Default: None
- fontcolor:
Font color.
Default (light-m): #000000 (black)
Expand Down
119 changes: 108 additions & 11 deletions verticapy/performance/vertica/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,10 @@ def _set_style(self, d: dict) -> None:
d["legend2_max"] = 0
else:
d["legend2_max"] += 1
if "threshold_metric1" not in d:
d["threshold_metric1"] = None
if "threshold_metric2" not in d:
d["threshold_metric2"] = None
if "display_path_transition" not in d:
d["display_path_transition"] = True
if "display_annotations" not in d:
Expand Down Expand Up @@ -1458,6 +1462,7 @@ def _gen_label_table(
label: Union[int, str],
colors: list,
operator: Optional[str] = None,
legend_metrics: Optional[list] = None,
) -> str:
"""
Generates the Graphviz
Expand Down Expand Up @@ -1485,16 +1490,59 @@ def _gen_label_table(
See :py:meth:`~verticapy.performance.vertica.tree`
for more information.
"""
if isinstance(label, int) and label < 0:
label = self._get_special_operator(operator)
if not (self.style["display_operator"]) and len(colors) == 1:
return f'"{label}", style="filled", fillcolor="{colors[0]}"'

# Init.
fontcolor = self.style["fontcolor"]
fontsize = self.style["fontsize"]
fillcolor = self.style["fillcolor"]
width = self.style["width"] * 30
height = self.style["height"] * 60
operator_icon = self._get_operator_icon(operator)

# Getting the label.
if isinstance(label, int) and label < 0:
label = self._get_special_operator(operator)

# Metrics Init.
display_path_id = True

if isinstance(legend_metrics, list) and len(legend_metrics) > 0:
metric_1 = legend_metrics[0]
else:
metric_1 = None
if isinstance(legend_metrics, list) and len(legend_metrics) > 1:
metric_2 = legend_metrics[1]
else:
metric_2 = None

metric_1_t = self.style["threshold_metric1"]
metric_2_t = self.style["threshold_metric2"]

if not (isinstance(metric_1_t, NoneType)):
if isinstance(metric_1, NoneType) or metric_1 < metric_1_t:
display_path_id = False

if not (isinstance(metric_2_t, NoneType)):
if isinstance(metric_2, NoneType) or metric_2 < metric_2_t:
display_path_id = False

# Special Display.
if not (display_path_id):
return (
'<<TABLE border="1" cellborder="1" cellspacing="0" '
f'cellpadding="0"><TR><TD WIDTH="{width * 2}" '
f'HEIGHT="{height * 1}" BGCOLOR="{fillcolor}">'
f'<FONT POINT-SIZE="{fontsize}" COLOR="{fontcolor}">'
f"{label}</FONT></TD></TR></TABLE>>",
display_path_id,
)
if not (self.style["display_operator"]) and len(colors) == 1:
return (
f'"{label}", style="filled", fillcolor="{colors[0]}"',
display_path_id,
)

# Main.
if len(colors) > 1:
second_color = (
f'<TD WIDTH="{width}" HEIGHT="{height}" '
Expand Down Expand Up @@ -1545,7 +1593,7 @@ def _gen_label_table(
f'COLOR="{fontcolor}">{label}</FONT></TD>{operator_icon}{second_color}'
f"</TR>{proj}</TABLE>>"
)
return label
return label, display_path_id

def _gen_labels(self) -> str:
"""
Expand Down Expand Up @@ -1710,6 +1758,7 @@ def _gen_labels(self) -> str:
tooltip_metrics += me_description

colors = [color]
legend_metrics = [self._get_metric(self.rows[i], self.metric[0], i)]
if len(self.metric) > 1:
if not (isinstance(self.metric[1], NoneType)):
if all_metrics_2[i] >= 0:
Expand All @@ -1723,10 +1772,12 @@ def _gen_labels(self) -> str:
self.style["hasnull_1"] = True
else:
colors += [self.style["fillcolor"]]
label = self._gen_label_table(
legend_metrics += [self._get_metric(self.rows[i], self.metric[1], i)]
label, display_path_id = self._gen_label_table(
label,
colors,
operator=row,
legend_metrics=legend_metrics,
)

if tree_id in links and display_tr:
Expand All @@ -1739,6 +1790,8 @@ def _gen_labels(self) -> str:
if ns_icon != "":
ns_icon += " "
ns_icon += QprofUtility._get_execute_on(tooltip)
if not (display_path_id):
ns_icon = ""
# Final Tooltip.
description = "\n\nDescriptors\n------------\n" + "\n".join(
tooltip.split("\n")[1:]
Expand Down Expand Up @@ -1957,6 +2010,9 @@ def _gen_legend_annotations(self, rows: Optional[list] = None):
all_legend[
"BROADCAST"
] = f'<tr><td BGCOLOR="{bgcolor}"><FONT COLOR="{fontcolor}">B</FONT></td><td BGCOLOR="{fillcolor}"><FONT COLOR="{fontcolor}">BROADCAST</FONT></td></tr>'
all_legend[
"..."
] = f'<tr><td BGCOLOR="{bgcolor}"><FONT COLOR="{fontcolor}">...</FONT></td><td BGCOLOR="{fillcolor}"><FONT COLOR="{fontcolor}">BROADCAST</FONT></td></tr>'
if "GLOBAL RESEGMENT" in row_tmp and "LOCAL RESEGMENT" in row_tmp:
all_legend[
"GLR"
Expand All @@ -1973,6 +2029,10 @@ def _gen_legend_annotations(self, rows: Optional[list] = None):
all_legend[
"RESEGMENT"
] = f'<tr><td BGCOLOR="{bgcolor}"><FONT COLOR="{fontcolor}">R</FONT></td><td BGCOLOR="{fillcolor}"><FONT COLOR="{fontcolor}">RESEGMENT</FONT></td></tr>'
if "RESEGMENT" in row_tmp and "BROADCAST" not in row_tmp:
all_legend[
"---"
] = f'<tr><td BGCOLOR="{bgcolor}"><FONT COLOR="{fontcolor}">---</FONT></td><td BGCOLOR="{fillcolor}"><FONT COLOR="{fontcolor}">RESEGMENT | NO BROADCAST</FONT></td></tr>'
if "HASH" in row_tmp:
all_legend[
"HASH"
Expand All @@ -1998,14 +2058,51 @@ def _gen_legend_annotations(self, rows: Optional[list] = None):
"ALL NODES"
] = f'<tr><td BGCOLOR="{bgcolor}"><FONT COLOR="{fontcolor}">🌐</FONT></td><td BGCOLOR="{fillcolor}"><FONT COLOR="{fontcolor}">ALL NODES</FONT></td></tr>'

trans_sort = [
"CROSS JOIN",
"INNER",
"OUTER",
"MERGE",
"FILTER",
"PIPELINED",
"BROADCAST",
"GLR",
"GR",
"LR",
"RESEGMENT",
]
trans_links_sort = [
"...",
"---",
]
trans_info_sort = [
"NO STATISTICS",
"QUERY INITIATOR",
"ALL NODES",
]

res = ""
for op in all_legend:
res += all_legend[op]
for idx, trans_list in enumerate(
[trans_sort, trans_links_sort, trans_info_sort]
):
res_trans = ""
if idx == 0:
name_tmp = "Path transition"
elif idx == 1:
name_tmp = "Link"
elif idx == 2:
name_tmp = "Information"
for op in trans_list:
if op in all_legend:
res_trans += all_legend[op]
if res_trans:
res += (
f'<tr><td BGCOLOR="{fillcolor}"></td><td BGCOLOR="{bgcolor}"><FONT COLOR="{fontcolor}">{name_tmp}</FONT></td></tr>'
+ res_trans
)

if res:
res_f = f'\tlegend_annotations [shape=plaintext, fillcolor=white, label=<<table border="0" cellborder="1" cellspacing="0">'
res = f'{res_f}<tr><td BGCOLOR="{bgcolor}"></td><td BGCOLOR="{bgcolor}"><FONT COLOR="{fontcolor}">Path transition</FONT></td></tr>{res}'
res += "</table>>]\n\n"
res = f'\tlegend_annotations [shape=plaintext, fillcolor=white, label=<<table border="0" cellborder="1" cellspacing="0">{res}</table>>]\n\n'
return res

def _gen_legend(self, metric: Optional[list] = None, idx: int = 0) -> str:
Expand Down

0 comments on commit de14804

Please sign in to comment.