From daa4db6608f47bbd24c5b28fc768249c3f98eaa5 Mon Sep 17 00:00:00 2001 From: Mike Date: Fri, 16 Aug 2024 07:35:34 +0100 Subject: [PATCH] Add string escaping to JSON formatter (#2875) JSON does not support multiline text so control characters must be escaped. This PR adds a generic `escapeControls` function to the `Formatter` classes, which is used by `Format::json::escape()`. --- Sming/Core/Data/Format/Formatter.cpp | 98 +++++++++++++++++++++++++++ Sming/Core/Data/Format/Formatter.h | 7 ++ Sming/Core/Data/Format/Json.cpp | 3 +- tests/HostTests/include/modules.h | 1 + tests/HostTests/modules/Formatter.cpp | 30 ++++++++ 5 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 Sming/Core/Data/Format/Formatter.cpp create mode 100644 tests/HostTests/modules/Formatter.cpp diff --git a/Sming/Core/Data/Format/Formatter.cpp b/Sming/Core/Data/Format/Formatter.cpp new file mode 100644 index 0000000000..f519306f6f --- /dev/null +++ b/Sming/Core/Data/Format/Formatter.cpp @@ -0,0 +1,98 @@ +/**** + * Sming Framework Project - Open Source framework for high efficiency native ESP8266 development. + * Created 2015 by Skurydin Alexey + * http://github.com/SmingHub/Sming + * All files of the Sming Core are provided under the LGPL v3 license. + * + * Formatter.cpp + * + * @author mikee47 Aug 2024 + * + ****/ + +#include "Formatter.h" + +namespace +{ +/** + * @brief Get character used for standard escapes + * @param c Code to be escaped + * @retval char Corresponding character, NUL if there isn't a standard escape + */ +char escapeChar(char c) +{ + switch(c) { + case '\0': + return '0'; + case '\'': + return '\''; + case '\"': + return '"'; + case '\?': + return '?'; + case '\\': + return '\\'; + case '\a': + return 'a'; + case '\b': + return 'b'; + case '\f': + return 'f'; + case '\n': + return 'n'; + case '\r': + return 'r'; + case '\t': + return 't'; + case '\v': + return 'v'; + default: + return '\0'; + } +} + +} // namespace + +namespace Format +{ +unsigned escapeControls(String& value) +{ + // Count number of extra characters we'll need to insert + unsigned extra{0}; + for(auto& c : value) { + if(escapeChar(c)) { + extra += 1; // "\" + } else if(uint8_t(c) < 0x20) { + extra += 3; // "\xnn" + } + } + if(extra == 0) { + return 0; + } + auto len = value.length(); + if(!value.setLength(len + extra)) { + return 0; + } + char* out = value.begin(); + const char* in = out; + memmove(out + extra, in, len); + in += extra; + while(len--) { + uint8_t c = *in++; + auto esc = escapeChar(c); + if(esc) { + *out++ = '\\'; + *out++ = esc; + } else if(c < 0x20) { + *out++ = '\\'; + *out++ = 'x'; + *out++ = hexchar(uint8_t(c) >> 4); + *out++ = hexchar(uint8_t(c) & 0x0f); + } else { + *out++ = c; + } + } + return extra; +} + +} // namespace Format diff --git a/Sming/Core/Data/Format/Formatter.h b/Sming/Core/Data/Format/Formatter.h index 993e0dd61c..9b45322165 100644 --- a/Sming/Core/Data/Format/Formatter.h +++ b/Sming/Core/Data/Format/Formatter.h @@ -17,6 +17,13 @@ namespace Format { +/** + * @brief Escape standard control codes such as `\n` (below ASCII 0x20) + * @param value String to be modified + * @retval unsigned Number of control characters found and replaced + */ +unsigned escapeControls(String& value); + /** * @brief Virtual class to perform format-specific String adjustments */ diff --git a/Sming/Core/Data/Format/Json.cpp b/Sming/Core/Data/Format/Json.cpp index d2f15f46ca..536bb017df 100644 --- a/Sming/Core/Data/Format/Json.cpp +++ b/Sming/Core/Data/Format/Json.cpp @@ -63,11 +63,10 @@ bool IsValidUtf8(const char* str, unsigned length) * * This can occur if filenames become corrupted, so here we just * substitute an underscore _ for anything which fails to match UTF8. - * - * TODO: Perform ANSI -> UTF8 conversion? */ void Json::escape(String& value) const { + escapeControls(value); if(!IsValidUtf8(value.c_str(), value.length())) { debug_w("Invalid UTF8: %s", value.c_str()); for(unsigned i = 0; i < value.length(); ++i) { diff --git a/tests/HostTests/include/modules.h b/tests/HostTests/include/modules.h index d29f54e112..037dedb112 100644 --- a/tests/HostTests/include/modules.h +++ b/tests/HostTests/include/modules.h @@ -29,6 +29,7 @@ XX(CStringArray) \ XX(Stream) \ XX(TemplateStream) \ + XX(Formatter) \ XX(Serial) \ XX(ObjectMap) \ XX_NET(Base64) \ diff --git a/tests/HostTests/modules/Formatter.cpp b/tests/HostTests/modules/Formatter.cpp new file mode 100644 index 0000000000..38fbd0bbca --- /dev/null +++ b/tests/HostTests/modules/Formatter.cpp @@ -0,0 +1,30 @@ +#include +#include + +class FormatterTest : public TestGroup +{ +public: + FormatterTest() : TestGroup(_F("Formatter")) + { + } + + void execute() override + { + DEFINE_FSTR_LOCAL(text1, "A JSON\ntest string\twith escapes\x12\0\n" + "Worth maybe \xc2\xa3" + "0.53.") + DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\x12\\0\\n" + "Worth maybe \xc2\xa3" + "0.53.") + + Serial << text1 << endl; + String s(text1); + Format::json.escape(s); + REQUIRE_EQ(s, text1b); + } +}; + +void REGISTER_TEST(Formatter) +{ + registerGroup(); +}