Skip to content

Commit

Permalink
RA: Fix legacy override utilization metrics (#7124)
Browse files Browse the repository at this point in the history
- Emit override utilization only when resource counts are under
threshold.
- Override utilization accounts for anticipated issuance.
- Correct the limit metric label for `CertificatesPerName` and
`CertificatesPerFQDNSet/Fast`.

Part of #5545
  • Loading branch information
beautifulentropy committed Nov 20, 2023
1 parent caf73f2 commit e1312ff
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 43 deletions.
77 changes: 48 additions & 29 deletions ra/ra.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,16 +394,15 @@ func (ra *RegistrationAuthorityImpl) checkRegistrationIPLimit(ctx context.Contex
}

threshold, overrideKey := limit.GetThreshold(ip.String(), noRegistrationID)
if count.Count >= threshold {
return berrors.RegistrationsPerIPError(0, "too many registrations for this IP")
}
if overrideKey != "" {
// We do not support overrides for the NewRegistrationsPerIPRange limit.
utilization := float64(count.Count) / float64(threshold)
utilization := float64(count.Count+1) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.RegistrationsPerIP, overrideKey).Set(utilization)
}

if count.Count >= threshold {
return berrors.RegistrationsPerIPError(0, "too many registrations for this IP")
}

return nil
}

Expand Down Expand Up @@ -600,14 +599,14 @@ func (ra *RegistrationAuthorityImpl) checkPendingAuthorizationLimit(ctx context.
if err != nil {
return err
}
if overrideKey != "" {
utilization := float64(countPB.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.PendingAuthorizationsPerAccount, overrideKey).Set(utilization)
}
if countPB.Count >= threshold {
ra.log.Infof("Rate limit exceeded, PendingAuthorizationsByRegID, regID: %d", regID)
return berrors.RateLimitError(0, "too many currently pending authorizations: %d", countPB.Count)
}
if overrideKey != "" {
utilization := float64(countPB.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.PendingAuthorizationsPerAccount, overrideKey).Set(utilization)
}
return nil
}

Expand Down Expand Up @@ -653,14 +652,14 @@ func (ra *RegistrationAuthorityImpl) checkInvalidAuthorizationLimit(ctx context.
// here.
noKey := ""
threshold, overrideKey := limit.GetThreshold(noKey, regID)
if overrideKey != "" {
utilization := float64(count.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.InvalidAuthorizationsPerAccount, overrideKey).Set(utilization)
}
if count.Count >= threshold {
ra.log.Infof("Rate limit exceeded, InvalidAuthorizationsByRegID, regID: %d", regID)
return berrors.FailedValidationError(0, "too many failed authorizations recently")
}
if overrideKey != "" {
utilization := float64(count.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.InvalidAuthorizationsPerAccount, overrideKey).Set(utilization)
}
return nil
}

Expand All @@ -684,14 +683,13 @@ func (ra *RegistrationAuthorityImpl) checkNewOrdersPerAccountLimit(ctx context.C
// There is no meaningful override key to use for this rate limit
noKey := ""
threshold, overrideKey := limit.GetThreshold(noKey, acctID)
if overrideKey != "" {
utilization := float64(count.Count) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.NewOrdersPerAccount, overrideKey).Set(utilization)
}

if count.Count >= threshold {
return berrors.RateLimitError(0, "too many new orders recently")
}
if overrideKey != "" {
utilization := float64(count.Count+1) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.NewOrdersPerAccount, overrideKey).Set(utilization)
}
return nil
}

Expand Down Expand Up @@ -1423,18 +1421,34 @@ func (ra *RegistrationAuthorityImpl) enforceNameCounts(ctx context.Context, name
}

var badNames []string
var metricsData []struct {
overrideKey string
utilization float64
}

// Find the names that have counts at or over the threshold. Range
// over the names slice input to ensure the order of badNames will
// return the badNames in the same order they were input.
for _, name := range names {
threshold, overrideKey := limit.GetThreshold(name, regID)
if overrideKey != "" {
utilization := float64(response.Counts[name]) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, overrideKey).Set(utilization)
}
if response.Counts[name] >= threshold {
badNames = append(badNames, name)
}
if overrideKey != "" {
// Name is under threshold due to an override.
utilization := float64(response.Counts[name]+1) / float64(threshold)
metricsData = append(metricsData, struct {
overrideKey string
utilization float64
}{overrideKey, utilization})
}
}

if len(badNames) == 0 {
// All names were under the threshold, emit override utilization metrics.
for _, data := range metricsData {
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, data.overrideKey).Set(data.utilization)
}
}
return badNames, response.Earliest.AsTime(), nil
}
Expand Down Expand Up @@ -1498,18 +1512,19 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx contex
return fmt.Errorf("checking duplicate certificate limit for %q: %s", names, err)
}

if overrideKey != "" {
utilization := float64(len(prevIssuances.TimestampsNS)) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, overrideKey).Set(utilization)
}

if int64(len(prevIssuances.TimestampsNS)) < threshold {
issuanceCount := int64(len(prevIssuances.TimestampsNS))
if issuanceCount < threshold {
// Issuance in window is below the threshold, no need to limit.
if overrideKey != "" {
utilization := float64(issuanceCount+1) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, overrideKey).Set(utilization)
}
return nil
} else {
// Evaluate the rate limit using a leaky bucket algorithm. The bucket
// has a capacity of threshold and is refilled at a rate of 1 token per
// limit.Window/threshold from the time of each issuance timestamp.
// limit.Window/threshold from the time of each issuance timestamp. The
// timestamps start from the most recent issuance and go back in time.
now := ra.clk.Now()
nsPerToken := limit.Window.Nanoseconds() / threshold
for i, timestamp := range prevIssuances.TimestampsNS {
Expand All @@ -1518,6 +1533,10 @@ func (ra *RegistrationAuthorityImpl) checkCertificatesPerFQDNSetLimit(ctx contex
// We know `i+1` tokens were generated since `tokenGeneratedSince`,
// and only `i` certificates were issued, so there's room to allow
// for an additional issuance.
if overrideKey != "" {
utilization := float64(issuanceCount) / float64(threshold)
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerFQDNSet, overrideKey).Set(utilization)
}
return nil
}
}
Expand Down
25 changes: 14 additions & 11 deletions ra/ra_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -719,12 +719,13 @@ func TestRegistrationsPerIPOverrideUsage(t *testing.T) {
}

// No error expected, the count of existing registrations for "4.5.6.7"
// should be 1 below the threshold.
// should be 1 below the override threshold.
err := ra.checkRegistrationIPLimit(ctx, rlp, regIP, mockCounterAlwaysTwo)
test.AssertNotError(t, err, "Unexpected error checking RegistrationsPerIPRange limit")

// Expecting ~66% of the override for "4.5.6.7" to be utilized.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.RegistrationsPerIP, "override_key": regIP.String()}, 0.6666666666666666)
// Accounting for the anticipated issuance, we expect "4.5.6.7" to be at
// 100% of their override threshold.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.RegistrationsPerIP, "override_key": regIP.String()}, 1)

mockCounterAlwaysThree := func(context.Context, *sapb.CountRegistrationsByIPRequest, ...grpc.CallOption) (*sapb.Count, error) {
return &sapb.Count{Count: 3}, nil
Expand Down Expand Up @@ -1223,30 +1224,32 @@ func TestCheckCertificatesPerNameLimit(t *testing.T) {
// Two base domains, one above threshold but with an override.
mockSA.nameCounts.Counts["example.com"] = 0
mockSA.nameCounts.Counts["bigissuer.com"] = 50
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, "bigissuer.com").Set(.5)
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
test.AssertNotError(t, err, "incorrectly rate limited bigissuer")
// "bigissuer.com" has an override of 100 and they've issued 50. We do
// not count issuance that has yet to occur, so we expect to see 50%
// utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, .5)
// "bigissuer.com" has an override of 100 and they've issued 50. Accounting
// for the anticipated issuance, we expect to see 51% utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, .51)

// Two base domains, one above its override
mockSA.nameCounts.Counts["example.com"] = 10
mockSA.nameCounts.Counts["bigissuer.com"] = 100
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, "bigissuer.com").Set(1)
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.example.com", "subdomain.bigissuer.com"}, rlp, 99)
test.AssertError(t, err, "incorrectly failed to rate limit bigissuer")
test.AssertErrorIs(t, err, berrors.RateLimit)
// "bigissuer.com" has an override of 100 and they've issued 100. So we
// expect to see 100% utilization.
// "bigissuer.com" has an override of 100 and they've issued 100. They're
// already at 100% utilization, so we expect to see 100% utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "bigissuer.com"}, 1)

// One base domain, above its override (which is below threshold)
mockSA.nameCounts.Counts["smallissuer.co.uk"] = 1
ra.rlOverrideUsageGauge.WithLabelValues(ratelimit.CertificatesPerName, "smallissuer.co.uk").Set(1)
err = ra.checkCertificatesPerNameLimit(ctx, []string{"www.smallissuer.co.uk"}, rlp, 99)
test.AssertError(t, err, "incorrectly failed to rate limit smallissuer")
test.AssertErrorIs(t, err, berrors.RateLimit)
// "smallissuer.co.uk" has an override of 1 and they've issued 1. So we
// expect to see 100% utilization.
// "smallissuer.co.uk" has an override of 1 and they've issued 1. They're
// already at 100% utilization, so we expect to see 100% utilization.
test.AssertMetricWithLabelsEquals(t, ra.rlOverrideUsageGauge, prometheus.Labels{"limit": ratelimit.CertificatesPerName, "override_key": "smallissuer.co.uk"}, 1)
}

Expand Down
6 changes: 3 additions & 3 deletions ratelimit/rate-limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
const (
// CertificatesPerName is the name of the CertificatesPerName rate limit
// when referenced in metric labels.
CertificatesPerName = "certificates_per_domain_per_account"
CertificatesPerName = "certificates_per_domain"

// RegistrationsPerIP is the name of the RegistrationsPerIP rate limit when
// referenced in metric labels.
Expand All @@ -33,11 +33,11 @@ const (

// CertificatesPerFQDNSet is the name of the CertificatesPerFQDNSet rate
// limit when referenced in metric labels.
CertificatesPerFQDNSet = "certificates_per_fqdn_set_per_account"
CertificatesPerFQDNSet = "certificates_per_fqdn_set"

// CertificatesPerFQDNSetFast is the name of the CertificatesPerFQDNSetFast
// rate limit when referenced in metric labels.
CertificatesPerFQDNSetFast = "certificates_per_fqdn_set_per_account_fast"
CertificatesPerFQDNSetFast = "certificates_per_fqdn_set_fast"

// NewOrdersPerAccount is the name of the NewOrdersPerAccount rate limit
// when referenced in metric labels.
Expand Down

0 comments on commit e1312ff

Please sign in to comment.