-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtroubleshooting.sql
134 lines (122 loc) · 5.05 KB
/
troubleshooting.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
-- Kafka external stream consume lag in a MV
SELECT node_id, database, stream_name, state_name, state_value
FROM system.stream_state_log
WHERE state_name LIKE 'processed_sn_%' OR state_name LIKE 'end_sn_%'
-- Commit sn and applied sn lag and their storage sizes of streams
WITH sorted_recent_data_points AS
(
SELECT
node_id, database, name, state_name, state_value, _tp_time AS ts
FROM
table(system.stream_state_log)
WHERE
NOT (starts_with(name, 'mv_k_') OR starts_with(name, '_k_')) AND
((state_name = 'stream_logstore_disk_size') OR (state_name = 'stream_historical_store_disk_size') OR (state_name LIKE 'committed_sn_%') OR (state_name LIKE 'applied_sn_%')) AND (_tp_time > (now() - 15m))
ORDER BY _tp_time ASC
)
SELECT node_id, database, name, state_name, latest(state_value) AS state_value, latest(ts) AS ts
FROM sorted_recent_data_points
GROUP BY
node_id, database, name, state_name
ORDER BY node_id, database, name, state_name;
SELECT
node_id, database, name, state_name, latest(state_value) AS state_value, latest(_tp_time) AS ts
FROM
table(system.stream_state_log)
WHERE
((state_name = 'stream_logstore_disk_size') OR (state_name = 'stream_historical_store_disk_size') OR (state_name LIKE 'committed_sn_%') OR (state_name LIKE 'applied_sn_%')) AND (_tp_time > (now() - 30m))
GROUP BY
node_id, database, name, state_name
SETTINGS
max_threads = 1, force_backfill_in_order = true
-- Replication status / lagging
WITH sorted_recent_data_points AS
(
SELECT
node_id AS leader_node, name, state_string_value AS replication_statuses, _tp_time AS ts
FROM
table(system.stream_state_log)
WHERE
(state_name = 'quorum_replication_status') AND (_tp_time > (now() - 1m))
ORDER BY
_tp_time DESC
), latest_data_point AS
(
SELECT
leader_node, earliest(name) AS name, earliest(replication_statuses) AS replication_statuses, earliest(ts) AS ts
FROM
sorted_recent_data_points
GROUP BY
leader_node
), extracted AS
(
SELECT
leader_node, name, replication_statuses:shard AS shard, json_extract_array_raw(replication_statuses, 'shard_replication_statuses') AS statuses, ts
FROM
latest_data_point
), flatten AS
(
SELECT
leader_node, name, shard, array_join(statuses) AS status, ts
FROM
extracted
)
SELECT
name, shard, leader_node, to_int(status:node) AS node, status:next_sn AS next_sn, status:replicated_sn AS replicated_sn, status:state AS state, status:append_message_flow_paused AS append_paused, status:inflight_messages AS inflight_messages, status:is_learner AS learner, status:recent_active AS recent_active
FROM
flatten
ORDER BY
node, shard ASC;
./programs/timeplusd client -h 127.0.0.1 --port 8463 --user <username> --password <password> --query "WITH sorted_recent_data_points AS
(
SELECT
node_id, database, name, state_name, state_value, _tp_time AS ts
FROM
table(system.stream_state_log)
WHERE
NOT (starts_with(name, 'mv_k_') OR starts_with(name, '_k_')) AND
((state_name = 'stream_logstore_disk_size') OR (state_name = 'stream_historical_store_disk_size') OR (state_name LIKE 'committed_sn_%') OR (state_name LIKE 'applied_sn_%')) AND (_tp_time > (now() - 15m))
ORDER BY _tp_time ASC
)
SELECT node_id, database, name, state_name, latest(state_value) AS state_value, latest(ts) AS ts
FROM sorted_recent_data_points
GROUP BY
node_id, database, name, state_name
ORDER BY node_id, database, name, state_name FORMAT CSV" > stream_states.csv
./programs/timeplusd client -h 127.0.0.1 --port 8463 --user <username> --password <password> --query "WITH sorted_recent_data_points AS
(
SELECT
node_id AS leader_node, database, name, state_string_value AS replication_statuses, replication_statuses:shard::uint32 AS shard, _tp_time AS ts
FROM
table(system.stream_state_log)
WHERE
_tp_time > now() - 15m AND (state_name = 'quorum_replication_status') AND (NOT starts_with(name, 'mv_k_')) AND (NOT starts_with(name, '_k_'))
ORDER BY
_tp_time DESC
), latest_data_point AS
(
SELECT
leader_node, latest(database) as database, name, shard, latest(replication_statuses) AS replication_statuses, latest(ts) AS ts
FROM
sorted_recent_data_points
GROUP BY
leader_node, name, shard
), extracted AS
(
SELECT
leader_node, database, name, shard, json_extract_array_raw(replication_statuses, 'shard_replication_statuses') AS statuses, ts
FROM
latest_data_point
), flatten AS
(
SELECT
leader_node, database, name, shard, array_join(statuses) AS status, ts
FROM
extracted
)
SELECT
database, name, shard, leader_node, to_int(status:node) AS node, status:next_sn::int64 AS next_sn, status:replicated_sn::int64 AS replicated_sn, next_sn - replicated_sn as lag, status:state AS state, status:append_message_flow_paused AS append_paused, status:inflight_messages AS inflight_messages, status:is_learner AS learner, status:recent_active AS recent_active
FROM
flatten
ORDER BY
database, name, shard, node FORMAT CSV" > rep_lags.csv