Skip to content

Commit 58627d9

Browse files
committed
enhance: enable check_qn_collection_leak
Signed-off-by: Wei Liu <wei.liu@zilliz.com>
1 parent 3dd5cce commit 58627d9

File tree

5 files changed

+537
-0
lines changed

5 files changed

+537
-0
lines changed

models/metrics_info.go

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
package models
2+
3+
import (
4+
"encoding/json"
5+
)
6+
7+
// ComponentInfos defines the interface of all component infos
8+
type ComponentInfos interface{}
9+
10+
// MarshalComponentInfos returns the json string of ComponentInfos
11+
func MarshalComponentInfos(infos ComponentInfos) (string, error) {
12+
binary, err := json.Marshal(infos)
13+
return string(binary), err
14+
}
15+
16+
// UnmarshalComponentInfos constructs a ComponentInfos object using a json string
17+
func UnmarshalComponentInfos(s string, infos ComponentInfos) error {
18+
return json.Unmarshal([]byte(s), infos)
19+
}
20+
21+
// HardwareMetrics records the hardware information of nodes.
22+
type HardwareMetrics struct {
23+
IP string `json:"ip"`
24+
CPUCoreCount int `json:"cpu_core_count"`
25+
CPUCoreUsage float64 `json:"cpu_core_usage"`
26+
Memory uint64 `json:"memory"`
27+
MemoryUsage uint64 `json:"memory_usage"`
28+
29+
// how to metric disk & disk usage in distributed storage
30+
Disk uint64 `json:"disk"`
31+
DiskUsage uint64 `json:"disk_usage"`
32+
}
33+
34+
const (
35+
// GitCommitEnvKey defines the key to retrieve the commit corresponding to the current milvus version
36+
// from the metrics information
37+
GitCommitEnvKey = "MILVUS_GIT_COMMIT"
38+
39+
// DeployModeEnvKey defines the key to retrieve the current milvus deployment mode
40+
// from the metrics information
41+
DeployModeEnvKey = "DEPLOY_MODE"
42+
43+
// ClusterDeployMode represents distributed deployment mode
44+
ClusterDeployMode = "DISTRIBUTED"
45+
46+
// StandaloneDeployMode represents the stand-alone deployment mode
47+
StandaloneDeployMode = "STANDALONE"
48+
49+
// GitBuildTagsEnvKey build tag
50+
GitBuildTagsEnvKey = "MILVUS_GIT_BUILD_TAGS"
51+
52+
// MilvusBuildTimeEnvKey build time
53+
MilvusBuildTimeEnvKey = "MILVUS_BUILD_TIME"
54+
55+
// MilvusUsedGoVersion used go version
56+
MilvusUsedGoVersion = "MILVUS_USED_GO_VERSION"
57+
)
58+
59+
// DeployMetrics records the deploy information of nodes.
60+
type DeployMetrics struct {
61+
SystemVersion string `json:"system_version"`
62+
DeployMode string `json:"deploy_mode"`
63+
BuildVersion string `json:"build_version"`
64+
BuildTime string `json:"build_time"`
65+
UsedGoVersion string `json:"used_go_version"`
66+
}
67+
68+
// BaseComponentInfos contains basic information that all components should have.
69+
type BaseComponentInfos struct {
70+
HasError bool `json:"has_error"`
71+
ErrorReason string `json:"error_reason"`
72+
Name string `json:"name"`
73+
HardwareInfos HardwareMetrics `json:"hardware_infos"`
74+
SystemInfo DeployMetrics `json:"system_info"`
75+
CreatedTime string `json:"created_time"`
76+
UpdatedTime string `json:"updated_time"`
77+
Type string `json:"type"`
78+
ID int64 `json:"id"`
79+
}
80+
81+
// QueryNodeConfiguration records the configuration of QueryNode.
82+
type QueryNodeConfiguration struct {
83+
SimdType string `json:"simd_type"`
84+
}
85+
86+
type QueryNodeCollectionMetrics struct {
87+
CollectionRows map[int64]int64
88+
}
89+
90+
// QueryNodeInfos implements ComponentInfos
91+
type QueryNodeInfos struct {
92+
BaseComponentInfos
93+
SystemConfigurations QueryNodeConfiguration `json:"system_configurations"`
94+
QuotaMetrics *QueryNodeQuotaMetrics `json:"quota_metrics"`
95+
CollectionMetrics *QueryNodeCollectionMetrics `json:"collection_metrics"`
96+
}
97+
98+
// QueryCoordConfiguration records the configuration of QueryCoord.
99+
type QueryCoordConfiguration struct {
100+
SearchChannelPrefix string `json:"search_channel_prefix"`
101+
SearchResultChannelPrefix string `json:"search_result_channel_prefix"`
102+
}
103+
104+
// QueryCoordInfos implements ComponentInfos
105+
type QueryCoordInfos struct {
106+
BaseComponentInfos
107+
SystemConfigurations QueryCoordConfiguration `json:"system_configurations"`
108+
}
109+
110+
// ProxyConfiguration records the configuration of Proxy.
111+
type ProxyConfiguration struct {
112+
DefaultPartitionName string `json:"default_partition_name"`
113+
DefaultIndexName string `json:"default_index_name"`
114+
}
115+
116+
// ProxyInfos implements ComponentInfos
117+
type ProxyInfos struct {
118+
BaseComponentInfos
119+
SystemConfigurations ProxyConfiguration `json:"system_configurations"`
120+
QuotaMetrics *ProxyQuotaMetrics `json:"quota_metrics"`
121+
}
122+
123+
// IndexNodeConfiguration records the configuration of IndexNode.
124+
type IndexNodeConfiguration struct {
125+
MinioBucketName string `json:"minio_bucket_name"`
126+
127+
SimdType string `json:"simd_type"`
128+
}
129+
130+
// IndexNodeInfos implements ComponentInfos
131+
type IndexNodeInfos struct {
132+
BaseComponentInfos
133+
SystemConfigurations IndexNodeConfiguration `json:"system_configurations"`
134+
}
135+
136+
// IndexCoordConfiguration records the configuration of IndexCoord.
137+
type IndexCoordConfiguration struct {
138+
MinioBucketName string `json:"minio_bucket_name"`
139+
}
140+
141+
// IndexCoordInfos implements ComponentInfos
142+
type IndexCoordInfos struct {
143+
BaseComponentInfos
144+
SystemConfigurations IndexCoordConfiguration `json:"system_configurations"`
145+
}
146+
147+
// DataNodeConfiguration records the configuration of DataNode.
148+
type DataNodeConfiguration struct {
149+
FlushInsertBufferSize int64 `json:"flush_insert_buffer_size"`
150+
}
151+
152+
// DataNodeInfos implements ComponentInfos
153+
type DataNodeInfos struct {
154+
BaseComponentInfos
155+
SystemConfigurations DataNodeConfiguration `json:"system_configurations"`
156+
QuotaMetrics *DataNodeQuotaMetrics `json:"quota_metrics"`
157+
}
158+
159+
// DataCoordConfiguration records the configuration of DataCoord.
160+
type DataCoordConfiguration struct {
161+
SegmentMaxSize float64 `json:"segment_max_size"`
162+
}
163+
164+
type DataCoordIndexInfo struct {
165+
NumEntitiesIndexed int64
166+
IndexName string
167+
FieldID int64
168+
}
169+
170+
type DataCoordCollectionInfo struct {
171+
NumEntitiesTotal int64
172+
IndexInfo []*DataCoordIndexInfo
173+
}
174+
175+
type DataCoordCollectionMetrics struct {
176+
Collections map[int64]*DataCoordCollectionInfo
177+
}
178+
179+
// DataCoordInfos implements ComponentInfos
180+
type DataCoordInfos struct {
181+
BaseComponentInfos
182+
SystemConfigurations DataCoordConfiguration `json:"system_configurations"`
183+
QuotaMetrics *DataCoordQuotaMetrics `json:"quota_metrics"`
184+
CollectionMetrics *DataCoordCollectionMetrics `json:"collection_metrics"`
185+
}
186+
187+
// RootCoordConfiguration records the configuration of RootCoord.
188+
type RootCoordConfiguration struct {
189+
MinSegmentSizeToEnableIndex int64 `json:"min_segment_size_to_enable_index"`
190+
}
191+
192+
// RootCoordInfos implements ComponentInfos
193+
type RootCoordInfos struct {
194+
BaseComponentInfos
195+
SystemConfigurations RootCoordConfiguration `json:"system_configurations"`
196+
}

models/quota_metrics.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package models
2+
3+
// RateMetricLabel defines the metric label collected from nodes.
4+
type RateMetricLabel = string
5+
6+
const (
7+
ReadResultThroughput RateMetricLabel = "ReadResultThroughput"
8+
InsertConsumeThroughput RateMetricLabel = "InsertConsumeThroughput"
9+
DeleteConsumeThroughput RateMetricLabel = "DeleteConsumeThroughput"
10+
)
11+
12+
const (
13+
UnsolvedQueueType string = "Unsolved"
14+
ReadyQueueType string = "Ready"
15+
ReceiveQueueType string = "Receive"
16+
ExecuteQueueType string = "Execute"
17+
)
18+
19+
// RateMetric contains a RateMetricLabel and a float rate.
20+
type RateMetric struct {
21+
Label RateMetricLabel
22+
Rate float64
23+
}
24+
25+
// FlowGraphMetric contains a minimal timestamp of flow graph and the number of flow graphs.
26+
type FlowGraphMetric struct {
27+
MinFlowGraphChannel string
28+
MinFlowGraphTt uint64
29+
NumFlowGraph int
30+
}
31+
32+
// NodeEffect contains the a node and its effected collection info.
33+
type NodeEffect struct {
34+
NodeID int64
35+
CollectionIDs []int64
36+
}
37+
38+
// QueryNodeQuotaMetrics are metrics of QueryNode.
39+
type QueryNodeQuotaMetrics struct {
40+
Hms HardwareMetrics
41+
Rms []RateMetric
42+
Fgm FlowGraphMetric
43+
GrowingSegmentsSize int64
44+
Effect NodeEffect
45+
DeleteBufferInfo DeleteBufferInfo
46+
}
47+
48+
type DeleteBufferInfo struct {
49+
CollectionDeleteBufferNum map[int64]int64
50+
CollectionDeleteBufferSize map[int64]int64
51+
}
52+
53+
type DataCoordQuotaMetrics struct {
54+
TotalBinlogSize int64
55+
CollectionBinlogSize map[int64]int64
56+
PartitionsBinlogSize map[int64]map[int64]int64
57+
// l0 segments
58+
CollectionL0RowCount map[int64]int64
59+
}
60+
61+
// DataNodeQuotaMetrics are metrics of DataNode.
62+
type DataNodeQuotaMetrics struct {
63+
Hms HardwareMetrics
64+
Rms []RateMetric
65+
Fgm FlowGraphMetric
66+
Effect NodeEffect
67+
}
68+
69+
// ProxyQuotaMetrics are metrics of Proxy.
70+
type ProxyQuotaMetrics struct {
71+
Hms HardwareMetrics
72+
Rms []RateMetric
73+
}

models/topology.go

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
package models
2+
3+
import (
4+
"encoding/json"
5+
"strconv"
6+
)
7+
8+
// in topology graph, the name of all nodes are consisted of role name and its' id
9+
// for example, Proxy1, DataCoord3
10+
11+
// ConstructComponentName returns a name according to the role name and its' id
12+
func ConstructComponentName(role string, id int64) string {
13+
return role + strconv.Itoa(int(id))
14+
}
15+
16+
// Topology defines the interface of topology graph between different components
17+
type Topology interface{}
18+
19+
// MarshalTopology returns the json string of Topology
20+
func MarshalTopology(topology Topology) (string, error) {
21+
binary, err := json.Marshal(topology)
22+
return string(binary), err
23+
}
24+
25+
// UnmarshalTopology constructs a Topology object using the json string
26+
func UnmarshalTopology(s string, topology Topology) error {
27+
return json.Unmarshal([]byte(s), topology)
28+
}
29+
30+
// QueryClusterTopology shows the topology between QueryCoord and QueryNodes
31+
type QueryClusterTopology struct {
32+
Self QueryCoordInfos `json:"self"`
33+
ConnectedNodes []QueryNodeInfos `json:"connected_nodes"`
34+
}
35+
36+
// ConnectionType is the type of connection between nodes
37+
type ConnectionType = string
38+
39+
// ConnectionType definitions
40+
const (
41+
CoordConnectToNode ConnectionType = "manage"
42+
Forward ConnectionType = "forward"
43+
)
44+
45+
// ConnectionTargetType is the type of connection target
46+
type ConnectionTargetType = string
47+
48+
// ConnectionInfo contains info of connection target
49+
type ConnectionInfo struct {
50+
TargetName string `json:"target_name"`
51+
TargetType ConnectionTargetType `json:"target_type"`
52+
}
53+
54+
// ConnTopology contains connection topology
55+
// TODO(dragondriver)
56+
// necessary to show all connection edge in topology graph?
57+
// for example, in system, Proxy connects to RootCoord and RootCoord also connects to Proxy,
58+
// if we do so, the connection relationship may be confusing.
59+
// ConnTopology shows how different components connect to each other.
60+
type ConnTopology struct {
61+
Name string `json:"name"`
62+
ConnectedComponents []ConnectionInfo `json:"connected_components"`
63+
}
64+
65+
// QueryCoordTopology shows the whole metrics of query cluster
66+
type QueryCoordTopology struct {
67+
Cluster QueryClusterTopology `json:"cluster"`
68+
Connections ConnTopology `json:"connections"`
69+
}
70+
71+
// IndexClusterTopology shows the topology between IndexCoord and IndexNodes
72+
type IndexClusterTopology struct {
73+
Self IndexCoordInfos `json:"self"`
74+
ConnectedNodes []IndexNodeInfos `json:"connected_nodes"`
75+
}
76+
77+
// IndexCoordTopology shows the whole metrics of index cluster
78+
type IndexCoordTopology struct {
79+
Cluster IndexClusterTopology `json:"cluster"`
80+
Connections ConnTopology `json:"connections"`
81+
}
82+
83+
// DataClusterTopology shows the topology between DataCoord and DataNodes
84+
type DataClusterTopology struct {
85+
Self DataCoordInfos `json:"self"`
86+
ConnectedDataNodes []DataNodeInfos `json:"connected_data_nodes"`
87+
ConnectedIndexNodes []IndexNodeInfos `json:"connected_index_nodes"`
88+
}
89+
90+
// DataCoordTopology shows the whole metrics of index cluster
91+
type DataCoordTopology struct {
92+
Cluster DataClusterTopology `json:"cluster"`
93+
Connections ConnTopology `json:"connections"`
94+
}
95+
96+
// RootCoordTopology shows the whole metrics of root coordinator
97+
type RootCoordTopology struct {
98+
Self RootCoordInfos `json:"self"`
99+
Connections ConnTopology `json:"connections"`
100+
}
101+
102+
// ConnectionEdge contains connection's id, type and target type
103+
type ConnectionEdge struct {
104+
ConnectedIdentifier int `json:"connected_identifier"`
105+
Type ConnectionType `json:"type"`
106+
TargetType ConnectionTargetType `json:"target_type"` // RootCoord, DataCoord ...
107+
}
108+
109+
// SystemTopologyNode is a node in system topology graph.
110+
type SystemTopologyNode struct {
111+
Identifier int `json:"identifier"` // unique in the SystemTopology graph
112+
Connected []ConnectionEdge `json:"connected"`
113+
Infos ComponentInfos `json:"infos"`
114+
}
115+
116+
// SystemTopology shows the system topology
117+
type SystemTopology struct {
118+
NodesInfo []SystemTopologyNode `json:"nodes_info"`
119+
}

states/etcd/commands.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ func RepairCommand(cli clientv3.KV, basePath string) *cobra.Command {
5757
repair.AddIndexParamsCommand(cli, basePath),
5858
// repair manual compaction
5959
repair.ManualCompactionCommand(cli, basePath),
60+
// check querynode collection leak
61+
repair.CheckQNCollectionLeak(cli, basePath),
6062
)
6163

6264
return repairCmd

0 commit comments

Comments
 (0)