From 08a589d4f8f0000a2fc5b751323f4c669d3398d6 Mon Sep 17 00:00:00 2001 From: Hank Donnay Date: Thu, 10 Jul 2025 15:59:52 -0500 Subject: [PATCH] zlog: fix incorrect escaping logic The previous way of doing this was (purposefully! there was a test!) inserting invalid characters into values. This does the percent-encoding in one spot, instead of doing a second pass that can introduce disallowed characters. See-also: quay/claircore#1582 Signed-off-by: Hank Donnay --- context.go | 64 ++++++++++++++++++++++++++++------------------------ zlog_test.go | 4 +++- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/context.go b/context.go index a62ab3d..1c1156c 100644 --- a/context.go +++ b/context.go @@ -2,48 +2,54 @@ package zlog import ( "context" - "fmt" "regexp" - "strconv" + "strings" + "unicode/utf8" "go.opentelemetry.io/otel/baggage" ) // NeedEscape matches a string that needs to be escaped either into an ASCII or a percent-encoded representation. -var needEscape = regexp.MustCompile(`%(?:$|([0-9a-fA-F]?[^0-9a-fA-F]))|[^\x21\x23-\x2B\x2D-\x3A\x3C-\x5B\x5D-\x7E]`) +var needEscape = regexp.MustCompile(`%(?:$|([0-9a-fA-F]?[^0-9a-fA-F]))|[^\x21\x23-\x2B\x2D-\x3A\x3C-\x5B\x5D-\x7E]|[\x80-\x{10FFFF}]`) // PctEncode matches a string that requires some characters to be percent-encoded. -var pctEncode = regexp.MustCompile(`%(?:$|([0-9a-fA-F][^0-9a-fA-F])|[^0-9a-fA-F])| |"|,|;|\\`) +var pctEncode = regexp.MustCompile(`%(?:$|([0-9a-fA-F][^0-9a-fA-F])|[^0-9a-fA-F])|[^\x21\x23-\x2B\x2D-\x3A\x3C-\x5B\x5D-\x7E]+|[\x80-\x{10FFFF}]+`) + +// EscapeOne is the set of 1-byte utf8 characters that should be percent encoded. +// +// This could be avoided if the [pctEncode] regexp was made robust enough to +// ignore correct hex escapes and only capture "lone" percent symbols. +var escapeOne = regexp.MustCompile(`[^\x21\x23-\x2B\x2D-\x3A\x3C-\x5B\x5D-\x7E]|%| |"|,|;|\\`) func escapeValue(v string) string { - v = pctEncode.ReplaceAllStringFunc(v, func(m string) (r string) { - for _, c := range m { - switch c { - case '%': - r += "%25" - case ' ': - r += "%20" - case '"': - r += "%22" - case ',': - r += "%2C" - case ';': - r += "%3B" - case '\\': - r += "%5C" - default: - // Just copy to the return value. - // This is (hopefully) just picking up the positions where percent-encoded nybbles would be. - r += string(c) + const hexchar = `0123456789ABCDEF` + var b strings.Builder + b.Grow(4 * 3) + return pctEncode.ReplaceAllStringFunc(v, func(v string) string { + b.Reset() + for _, c := range v { + n := utf8.RuneLen(c) + if n == 1 { + c := byte(c) + if escapeOne.Match([]byte{c}) { + b.WriteRune('%') + b.WriteByte(hexchar[c>>4]) + b.WriteByte(hexchar[c&15]) + } else { + b.WriteByte(c) + } + continue + } + p := make([]byte, n) + utf8.EncodeRune(p, c) + for _, c := range p { + b.WriteRune('%') + b.WriteByte(hexchar[c>>4]) + b.WriteByte(hexchar[c&15]) } } - if len(m) == len(r) { - panic(fmt.Sprintf("programmer error: pulled odd string %q", m)) - } - return r + return b.String() }) - v = strconv.QuoteToASCII(v) - return v[1 : len(v)-1] } // ContextWithValues is a helper for the go.opentelemetry.io/otel/baggage v1 diff --git a/zlog_test.go b/zlog_test.go index 810aaf5..57c055d 100644 --- a/zlog_test.go +++ b/zlog_test.go @@ -29,6 +29,7 @@ func TestEscape(t *testing.T) { {",", true}, {";", true}, {"\\", true}, + {"\n", true}, } for _, tc := range tt { if got, want := needEscape.MatchString(tc.In), tc.Want; got != want { @@ -41,13 +42,14 @@ func TestEscape(t *testing.T) { In string Want string }{ - {`"🆒"`, `%22\U0001f192%22`}, + {`"🆒"`, `%22%F0%9F%86%92%22`}, {`https://example.com?data=%70%62%73%73%77%6F%72%64&a=b`, `https://example.com?data=%70%62%73%73%77%6F%72%64&a=b`}, {`20% done`, `20%25%20done`}, {`%9Z`, `%259Z`}, {`\n`, `%5Cn`}, {`,`, `%2C`}, {`;`, `%3B`}, + {"\n", `%0A`}, } for _, tc := range tt { if got, want := escapeValue(tc.In), tc.Want; got != want {