Skip to content

Commit

Permalink
Add sql text counts stats to vtcombo,vtgate+vttablet (#15897)
Browse files Browse the repository at this point in the history
Signed-off-by: Tim Vaillancourt <tim@timvaillancourt.com>
Signed-off-by: Harshit Gangal <harshit@planetscale.com>
Co-authored-by: Harshit Gangal <harshit@planetscale.com>
Co-authored-by: Deepthi Sigireddi <deepthi.sigireddi@gmail.com>
  • Loading branch information
3 people authored Jun 4, 2024
1 parent 0bdcc46 commit e640384
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 33 deletions.
7 changes: 7 additions & 0 deletions changelog/20.0/20.0.0/summary.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
- **[Minor Changes](#minor-changes)**
- **[New Stats](#new-stats)**
- [VTTablet Query Cache Hits and Misses](#vttablet-query-cache-hits-and-misses)
- [VTGate and VTTablet Query Text Characters Processed](#vttablet-query-text-characters-processed)
- **[`SIGHUP` reload of gRPC client static auth creds](#sighup-reload-of-grpc-client-auth-creds)**
- **[VTAdmin](#vtadmin)**
- [Updated to node v20.12.2](#updated-node)
Expand Down Expand Up @@ -326,6 +327,12 @@ VTTablet exposes two new counter stats:
* `QueryCacheHits`: Query engine query cache hits
* `QueryCacheMisses`: Query engine query cache misses

### <a id="#vttablet-query-text-characters-processed"/>VTTablet Query Text Characters Processed

VTGate and VTTablet expose a new counter stat `QueryTextCharactersProcessed` to reflect the number of query text characters processed.

VTGate groups this metric by Operation, Keyspace and TabletType. On VTTablet it is grouped by Table, Plan and optionally Workload.

### <a id="sighup-reload-of-grpc-client-auth-creds"/>`SIGHUP` reload of gRPC client static auth creds

The internal gRPC client now caches the static auth credentials and supports reloading via the `SIGHUP` signal. Previous to v20 the credentials were not cached. They were re-loaded from disk on every use.
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vtgate/plugin_mysql_server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ func TestKillMethods(t *testing.T) {
func TestGracefulShutdown(t *testing.T) {
executor, _, _, _, _ := createExecutorEnv(t)

vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected})
vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected, queryTextCharsProcessed: queryTextCharsProcessed})
th := &testHandler{}
listener, err := mysql.NewListener("tcp", "127.0.0.1:", mysql.NewAuthServerNone(), th, 0, 0, false, false, 0, 0)
require.NoError(t, err)
Expand Down Expand Up @@ -379,7 +379,7 @@ func TestGracefulShutdown(t *testing.T) {
func TestGracefulShutdownWithTransaction(t *testing.T) {
executor, _, _, _, _ := createExecutorEnv(t)

vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected})
vh := newVtgateHandler(&VTGate{executor: executor, timings: timings, rowsReturned: rowsReturned, rowsAffected: rowsAffected, queryTextCharsProcessed: queryTextCharsProcessed})
th := &testHandler{}
listener, err := mysql.NewListener("tcp", "127.0.0.1:", mysql.NewAuthServerNone(), th, 0, 0, false, false, 0, 0)
require.NoError(t, err)
Expand Down
30 changes: 19 additions & 11 deletions go/vt/vtgate/vtgate.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,11 @@ var (
"VtgateApiRowsAffected",
"Rows affected by a write (DML) operation through the VTgate API",
[]string{"Operation", "Keyspace", "DbType"})

queryTextCharsProcessed = stats.NewCountersWithMultiLabels(
"VtgateQueryTextCharactersProcessed",
"Query text characters processed through the VTGate API",
[]string{"Operation", "Keyspace", "DbType"})
)

// VTGate is the rpc interface to vtgate. Only one instance
Expand All @@ -225,9 +230,10 @@ type VTGate struct {
// stats objects.
// TODO(sougou): This needs to be cleaned up. There
// are global vars that depend on this member var.
timings *stats.MultiTimings
rowsReturned *stats.CountersWithMultiLabels
rowsAffected *stats.CountersWithMultiLabels
timings *stats.MultiTimings
rowsReturned *stats.CountersWithMultiLabels
rowsAffected *stats.CountersWithMultiLabels
queryTextCharsProcessed *stats.CountersWithMultiLabels

// the throttled loggers for all errors, one per API entry
logExecute *logutil.ThrottledLogger
Expand Down Expand Up @@ -459,6 +465,7 @@ func (vtg *VTGate) Execute(ctx context.Context, mysqlCtx vtgateservice.MySQLConn
if err == nil {
vtg.rowsReturned.Add(statsKey, int64(len(qr.Rows)))
vtg.rowsAffected.Add(statsKey, int64(qr.RowsAffected))
vtg.queryTextCharsProcessed.Add(statsKey, int64(len(sql)))
return session, qr, nil
}

Expand Down Expand Up @@ -669,14 +676,15 @@ func (vtg *VTGate) HandlePanic(err *error) {

func newVTGate(executor *Executor, resolver *Resolver, vsm *vstreamManager, tc *TxConn, gw *TabletGateway) *VTGate {
return &VTGate{
executor: executor,
resolver: resolver,
vsm: vsm,
txConn: tc,
gw: gw,
timings: timings,
rowsReturned: rowsReturned,
rowsAffected: rowsAffected,
executor: executor,
resolver: resolver,
vsm: vsm,
txConn: tc,
gw: gw,
timings: timings,
rowsReturned: rowsReturned,
rowsAffected: rowsAffected,
queryTextCharsProcessed: queryTextCharsProcessed,

logExecute: logutil.NewThrottledLogger("Execute", 5*time.Second),
logPrepare: logutil.NewThrottledLogger("Prepare", 5*time.Second),
Expand Down
18 changes: 11 additions & 7 deletions go/vt/vttablet/tabletserver/query_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,8 @@ type QueryEngine struct {

// stats
// Note: queryErrorCountsWithCode is similar to queryErrorCounts except it contains error code as an additional dimension
queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned *stats.CountersWithMultiLabels
queryCacheHits, queryCacheMisses *stats.CounterFunc
queryCounts, queryCountsWithTabletType, queryTimes, queryErrorCounts, queryErrorCountsWithCode, queryRowsAffected, queryRowsReturned, queryTextCharsProcessed *stats.CountersWithMultiLabels
queryCacheHits, queryCacheMisses *stats.CounterFunc

// stats flags
enablePerWorkloadTableMetrics bool
Expand Down Expand Up @@ -298,6 +298,7 @@ func NewQueryEngine(env tabletenv.Env, se *schema.Engine) *QueryEngine {
qe.queryTimes = env.Exporter().NewCountersWithMultiLabels("QueryTimesNs", "query times in ns", labels)
qe.queryRowsAffected = env.Exporter().NewCountersWithMultiLabels("QueryRowsAffected", "query rows affected", labels)
qe.queryRowsReturned = env.Exporter().NewCountersWithMultiLabels("QueryRowsReturned", "query rows returned", labels)
qe.queryTextCharsProcessed = env.Exporter().NewCountersWithMultiLabels("QueryTextCharactersProcessed", "query text characters processed", labels)
qe.queryErrorCounts = env.Exporter().NewCountersWithMultiLabels("QueryErrorCounts", "query error counts", labels)
qe.queryErrorCountsWithCode = env.Exporter().NewCountersWithMultiLabels("QueryErrorCountsWithCode", "query error counts with error code", []string{"Table", "Plan", "Code"})

Expand Down Expand Up @@ -559,30 +560,33 @@ func (qe *QueryEngine) QueryPlanCacheLen() (count int) {
}

// AddStats adds the given stats for the planName.tableName
func (qe *QueryEngine) AddStats(planType planbuilder.PlanType, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
func (qe *QueryEngine) AddStats(plan *TabletPlan, tableName, workload string, tabletType topodata.TabletType, queryCount int64, duration, mysqlTime time.Duration, rowsAffected, rowsReturned, errorCount int64, errorCode string) {
// table names can contain "." characters, replace them!
keys := []string{tableName, planType.String()}
keys := []string{tableName, plan.PlanID.String()}
// Only use the workload as a label if that's enabled in the configuration.
if qe.enablePerWorkloadTableMetrics {
keys = append(keys, workload)
}
qe.queryCounts.Add(keys, queryCount)
qe.queryTimes.Add(keys, int64(duration))
qe.queryErrorCounts.Add(keys, errorCount)
if plan.FullQuery != nil {
qe.queryTextCharsProcessed.Add(keys, int64(len(plan.FullQuery.Query)))
}

qe.queryCountsWithTabletType.Add([]string{tableName, planType.String(), tabletType.String()}, queryCount)
qe.queryCountsWithTabletType.Add([]string{tableName, plan.PlanID.String(), tabletType.String()}, queryCount)

// queryErrorCountsWithCode is similar to queryErrorCounts except we have an additional dimension
// of error code.
if errorCount > 0 {
errorKeys := []string{tableName, planType.String(), errorCode}
errorKeys := []string{tableName, plan.PlanID.String(), errorCode}
qe.queryErrorCountsWithCode.Add(errorKeys, errorCount)
}

// For certain plan types like select, we only want to add their metrics to rows returned
// But there are special cases like `SELECT ... INTO OUTFILE ''` which return positive rows affected
// So we check if it is positive and add that too.
switch planType {
switch plan.PlanID {
case planbuilder.PlanSelect, planbuilder.PlanSelectStream, planbuilder.PlanSelectImpossible, planbuilder.PlanShow, planbuilder.PlanOtherRead:
qe.queryRowsReturned.Add(keys, rowsReturned)
if rowsAffected > 0 {
Expand Down
45 changes: 34 additions & 11 deletions go/vt/vttablet/tabletserver/query_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,21 @@ func TestPlanCachePollution(t *testing.T) {
}

func TestAddQueryStats(t *testing.T) {
fakeSelectPlan := &TabletPlan{
Plan: &planbuilder.Plan{
PlanID: planbuilder.PlanSelect,
FullQuery: &sqlparser.ParsedQuery{Query: `select * from something where something=123`}, // 43 length
},
}
fakeInsertPlan := &TabletPlan{
Plan: &planbuilder.Plan{
PlanID: planbuilder.PlanInsert,
FullQuery: &sqlparser.ParsedQuery{Query: `insert into something (id, msg) values(123, 'hello world!')`}, // 59 length
},
}
testcases := []struct {
name string
planType planbuilder.PlanType
plan *TabletPlan
tableName string
tabletType topodata.TabletType
queryCount int64
Expand All @@ -654,12 +666,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes string
expectedQueryRowsAffected string
expectedQueryRowsReturned string
expectedQueryTextCharsProcessed string
expectedQueryErrorCounts string
expectedQueryErrorCountsWithCode string
}{
{
name: "select query",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -674,12 +687,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select query against a replica",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_REPLICA,
queryCount: 1,
Expand All @@ -694,12 +708,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 15}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.REPLICA": 1}`,
}, {
name: "select into query",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -714,12 +729,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{"A.Select": 15}`,
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -734,12 +750,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select": 43}`,
expectedQueryErrorCounts: `{"A.Select": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query",
planType: planbuilder.PlanInsert,
plan: fakeInsertPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -754,12 +771,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Insert": 10}`,
expectedQueryRowsAffected: `{"A.Insert": 15}`,
expectedQueryRowsReturned: `{}`,
expectedQueryTextCharsProcessed: `{"A.Insert": 59}`,
expectedQueryErrorCounts: `{"A.Insert": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
}, {
name: "select query with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -774,12 +792,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 15}`,
expectedQueryTextCharsProcessed: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "select into query with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -794,12 +813,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{"A.Select.some-workload": 15}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "error with per workload metrics",
planType: planbuilder.PlanSelect,
plan: fakeSelectPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -814,12 +834,13 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Select.some-workload": 10}`,
expectedQueryRowsAffected: `{}`,
expectedQueryRowsReturned: `{"A.Select.some-workload": 0}`,
expectedQueryTextCharsProcessed: `{"A.Select.some-workload": 43}`,
expectedQueryErrorCounts: `{"A.Select.some-workload": 1}`,
expectedQueryErrorCountsWithCode: `{"A.Select.RESOURCE_EXHAUSTED": 1}`,
expectedQueryCountsWithTableType: `{"A.Select.PRIMARY": 1}`,
}, {
name: "insert query with per workload metrics",
planType: planbuilder.PlanInsert,
plan: fakeInsertPlan,
tableName: "A",
tabletType: topodata.TabletType_PRIMARY,
queryCount: 1,
Expand All @@ -834,6 +855,7 @@ func TestAddQueryStats(t *testing.T) {
expectedQueryTimes: `{"A.Insert.some-workload": 10}`,
expectedQueryRowsAffected: `{"A.Insert.some-workload": 15}`,
expectedQueryRowsReturned: `{}`,
expectedQueryTextCharsProcessed: `{"A.Insert.some-workload": 59}`,
expectedQueryErrorCounts: `{"A.Insert.some-workload": 0}`,
expectedQueryErrorCountsWithCode: `{}`,
expectedQueryCountsWithTableType: `{"A.Insert.PRIMARY": 1}`,
Expand All @@ -849,12 +871,13 @@ func TestAddQueryStats(t *testing.T) {
env := tabletenv.NewEnv(vtenv.NewTestEnv(), cfg, "TestAddQueryStats_"+testcase.name)
se := schema.NewEngine(env)
qe := NewQueryEngine(env, se)
qe.AddStats(testcase.planType, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
qe.AddStats(testcase.plan, testcase.tableName, testcase.workload, testcase.tabletType, testcase.queryCount, testcase.duration, testcase.mysqlTime, testcase.rowsAffected, testcase.rowsReturned, testcase.errorCount, testcase.errorCode)
assert.Equal(t, testcase.expectedQueryCounts, qe.queryCounts.String())
assert.Equal(t, testcase.expectedQueryCountsWithTableType, qe.queryCountsWithTabletType.String())
assert.Equal(t, testcase.expectedQueryTimes, qe.queryTimes.String())
assert.Equal(t, testcase.expectedQueryRowsAffected, qe.queryRowsAffected.String())
assert.Equal(t, testcase.expectedQueryRowsReturned, qe.queryRowsReturned.String())
assert.Equal(t, testcase.expectedQueryTextCharsProcessed, qe.queryTextCharsProcessed.String())
assert.Equal(t, testcase.expectedQueryErrorCounts, qe.queryErrorCounts.String())
assert.Equal(t, testcase.expectedQueryErrorCountsWithCode, qe.queryErrorCountsWithCode.String())
})
Expand Down
4 changes: 2 additions & 2 deletions go/vt/vttablet/tabletserver/query_executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,12 @@ func (qre *QueryExecutor) Execute() (reply *sqltypes.Result, err error) {
errCode = vtErrorCode.String()

if reply == nil {
qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, 0, 0, 1, errCode)
qre.plan.AddStats(1, duration, mysqlTime, 0, 0, 1)
return
}

qre.tsv.qe.AddStats(qre.plan.PlanID, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.tsv.qe.AddStats(qre.plan, tableName, qre.options.GetWorkloadName(), qre.targetTabletType, 1, duration, mysqlTime, int64(reply.RowsAffected), int64(len(reply.Rows)), 0, errCode)
qre.plan.AddStats(1, duration, mysqlTime, reply.RowsAffected, uint64(len(reply.Rows)), 0)
qre.logStats.RowsAffected = int(reply.RowsAffected)
qre.logStats.Rows = reply.Rows
Expand Down

0 comments on commit e640384

Please sign in to comment.