diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 56faecbd..dcfcb6ed 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -552,12 +552,15 @@ const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length) TIXMLASSERT(digit < radix); const unsigned int digitScaled = mult * digit; + // Reject before adding: if digitScaled alone exceeds MAX_CODE_POINT, + // or if adding it to ucs would exceed it (checked without overflow by + // testing ucs > MAX_CODE_POINT - digitScaled, safe since digitScaled + // <= MAX_CODE_POINT at this point). + if (digitScaled > MAX_CODE_POINT || ucs > MAX_CODE_POINT - digitScaled) { + return 0; + } ucs += digitScaled; - mult *= radix; - - // Security check: could a value exist that is out of range? - // Easily; limit to the MAX_CODE_POINT, which also allows for a - // bunch of leading zeroes. + mult *= radix; if (mult > MAX_CODE_POINT) { mult = MAX_CODE_POINT; } @@ -569,11 +572,11 @@ const char* XMLUtil::GetCharacterRef(const char* p, char* value, int* length) } // convert the UCS to UTF-8 ConvertUTF32ToUTF8(ucs, value, length); - if (length == 0) { - // If length is 0, there was an error. (Security? Bad input?) + if (*length == 0) { + // If *length is 0, ConvertUTF32ToUTF8 rejected the code point. // Fail safely. - return 0; - } + return 0; + } return p + delta + 1; } return p + 1; diff --git a/tinyxml2.h b/tinyxml2.h index c8011174..6dd15107 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -42,14 +42,6 @@ distribution. #endif #include -/* - gcc: - g++ -Wall -DTINYXML2_DEBUG tinyxml2.cpp xmltest.cpp -o gccxmltest.exe - - Formatting, Artistic Style: - AStyle.exe --style=1tbs --indent-switches --break-closing-brackets --indent-preprocessor tinyxml2.cpp tinyxml2.h -*/ - #if defined( _DEBUG ) || defined (__DEBUG__) # ifndef TINYXML2_DEBUG # define TINYXML2_DEBUG diff --git a/xmltest.cpp b/xmltest.cpp index d058707b..574adc03 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -2701,6 +2701,56 @@ int main( int argc, const char ** argv ) XMLTest("Test attribute encode with a Hex value", value5, "!"); // hex value in unicode value } + // ---------- Security: numeric character reference bounds ---------- + { + // Regression: U+10FFFF is the last valid Unicode code point and must + // parse correctly. Fix #2 must not reject the maximum valid value. + XMLDocument doc; + doc.Parse( "" ); + XMLTest( "Numeric ref U+10FFFF: no error", false, doc.Error() ); + const char* v = doc.FirstChildElement()->Attribute( "v" ); + // U+10FFFF encodes to the 4-byte UTF-8 sequence F4 8F BF BF. + const char expected[] = { + static_cast(0xF4), static_cast(0x8F), + static_cast(0xBF), static_cast(0xBF), 0 + }; + XMLTest( "Numeric ref U+10FFFF: correct UTF-8 output", expected, v ); + } + { + // Fix #2 boundary: U+110000 is one above the maximum code point. + // The in-loop overflow guard must catch this before ucs is written, + // leaving the entity as a literal (starting with '&'). + XMLDocument doc; + doc.Parse( "" ); + XMLTest( "Numeric ref U+110000: no parse error", false, doc.Error() ); + const char* v = doc.FirstChildElement()->Attribute( "v" ); + XMLTest( "Numeric ref U+110000: not resolved (left as literal)", true, + v != nullptr && v[0] == '&' ); + } + { + // Fix #2: a hex entity with enough digits to overflow uint32_t must + // be rejected by the in-loop guard before the accumulator wraps. + // Before the fix, ucs could wrap around and pass the post-loop range + // check, producing an attacker-chosen character in the parsed output. + // Build "&#x" + 300 'F' digits + ";" -- far beyond what fits in uint32_t. + const char prefix[] = ""; + static const int NDIGITS = 300; + char xml[sizeof(prefix) + NDIGITS + sizeof(suffix)]; + strcpy( xml, prefix ); + memset( xml + strlen(prefix), 'F', NDIGITS ); + strcpy( xml + strlen(prefix) + NDIGITS, suffix ); + + XMLDocument doc; + doc.Parse( xml ); + XMLTest( "Overflow hex entity: no parse error", false, doc.Error() ); + const char* v = doc.FirstChildElement()->Attribute( "v" ); + // GetCharacterRef returns 0 for rejected refs; the caller then copies + // the literal '&', so the attribute must start with '&', not a char. + XMLTest( "Overflow hex entity: not resolved to a character", true, + v != nullptr && v[0] == '&' ); + } + // ---------- XMLPrinter Apos Escaping ------ { const char* testText = "text containing a ' character";