diff --git a/.gitignore b/.gitignore index a7246a52..c6a63cd1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ /doc erl_crash.dump *.ez +/.vscode diff --git a/lib/plug/conn/utils.ex b/lib/plug/conn/utils.ex index 5bb7e20a..e1d9c4e1 100644 --- a/lib/plug/conn/utils.ex +++ b/lib/plug/conn/utils.ex @@ -1,4 +1,6 @@ defmodule Plug.Conn.Utils do + require Logger + @moduledoc """ Utilities for working with connection data """ @@ -11,6 +13,7 @@ defmodule Plug.Conn.Utils do @other [?., ?-, ?+] @space [?\s, ?\t] @specials ~c|()<>@,;:\\"/[]?={}| + @utf8_error_code Application.compile_env(:plug, :utf8_error_code, 500) @doc ~S""" Parses media types (with wildcards). @@ -278,24 +281,78 @@ defmodule Plug.Conn.Utils do end @doc """ - Validates the given binary is valid UTF-8. + Validates if the given binary data is valid UTF-8. + + This function checks a binary string to determine if it is a valid UTF-8 encoded string. + It operates based on the given `error_code`, which dictates the behavior when encountering invalid UTF-8 bytes. + + ## Parameters + + - `binary`: The binary data to be checked. + - `exception`: The exception module to be used for raising errors in case of invalid UTF-8. + - `context`: A string providing context about the binary data being checked. + - `error_code`: An integer that determines the behavior when invalid UTF-8 is encountered. + - `500` will raise the specified `exception` (default). + - `404` will return an error tuple. + - Codes within `100..999` will log a warning. + + ## Examples + + iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context") + ** (RuntimeError) invalid UTF-8 on test context, got byte 255 in position 0 + + iex> Plug.Conn.Utils.validate_utf8!("valid string", RuntimeError, "test context", 404) + :ok + + iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context", 404) + {:error, "invalid UTF-8 on test context, got byte 255 in position 0"} + + + + # Example with logging for invalid UTF-8 + iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context", 200) + :ok + # Logs "invalid UTF-8 on test context, got byte 255 in position 0" + """ - @spec validate_utf8!(binary, module, binary) :: :ok | no_return - def validate_utf8!(binary, exception, context) + @spec validate_utf8!(binary, module, binary, integer()) :: + :ok | no_return | {:error, String.t()} + def validate_utf8!(binary, exception, context, error_code \\ @utf8_error_code) + + def validate_utf8!(<>, exception, context, error_code) do + do_validate_utf8!(binary, exception, context, error_code, 0) + end - def validate_utf8!(<>, exception, context) do - do_validate_utf8!(binary, exception, context) + defp do_validate_utf8!(<>, exception, context, error_code, byte_position) + when Bitwise.band(a, 0x80808080808080) == 0 do + do_validate_utf8!(rest, exception, context, error_code, byte_position + 7) end - defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context) do - do_validate_utf8!(rest, exception, context) + defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context, error_code, byte_position) do + do_validate_utf8!(rest, exception, context, error_code, byte_position + 1) end - defp do_validate_utf8!(<>, exception, context) do - raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" + defp do_validate_utf8!(<>, exception, context, error_code, byte_position) do + case error_code do + 500 -> + raise exception, + "invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}" + + 404 -> + {:error, "invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}"} + + error_code when error_code in 100..999 -> + :ok = + Logger.warning( + "invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}", + error: @utf8_error_code, + context: context, + byte: byte + ) + end end - defp do_validate_utf8!(<<>>, _exception, _context) do + defp do_validate_utf8!(<<>>, _exception, _context, _error_code, _byte_position) do :ok end diff --git a/test/plug/conn/utils_test.exs b/test/plug/conn/utils_test.exs index 09e83963..52234e64 100644 --- a/test/plug/conn/utils_test.exs +++ b/test/plug/conn/utils_test.exs @@ -1,6 +1,83 @@ defmodule Plug.Conn.UtilsTest do use ExUnit.Case, async: true + import ExUnit.CaptureLog import Plug.Conn.Utils + alias Plug.Conn.Utils, as: Utils doctest Plug.Conn.Utils + + @exception RuntimeError + @context "test context" + @valid_utf8 "utm_campaign=summer+sale&foo=bar&utm_medium=email&utm_source=sendgrid.com&utm_term=utm_term&utm_content=utm_content&utm_id=utm_id" + @invalid_utf8 <<"utm_campaign=summer+sale&foo=bar&utm_medium=email&utm_source=sen", 255>> + + setup_all do + %{ + exception: @exception, + context: @context, + valid_utf8: @valid_utf8, + invalid_utf8: @invalid_utf8 + } + end + + describe "validate_utf8! with error_code 500" do + setup context, do: Map.merge(context, %{error_code: 500}) + + test "raises an exception for invalid UTF-8 input", context do + assert_raise context.exception, + "invalid UTF-8 on #{context.context}, got byte 255 in position #{byte_size(@invalid_utf8) - 1}", + fn -> + Utils.validate_utf8!( + context.invalid_utf8, + context.exception, + context.context, + context.error_code + ) + end + end + end + + describe "validate_utf8! with error_code 404" do + setup context, do: Map.merge(context, %{error_code: 404}) + + test "returns {:error, message} for invalid UTF-8 w/ error code 404", + context_map do + %{context: context} = context_map + + error_tuple = + {:error, + "invalid UTF-8 on #{context}, got byte 255 in position #{byte_size(@invalid_utf8) - 1}"} + + assert error_tuple == + Utils.validate_utf8!( + context_map.invalid_utf8, + context_map.exception, + context_map.context, + context_map.error_code + ) + end + end + + describe "validate_utf8! with error_code 401" do + setup context, do: Map.merge(context, %{error_code: 401}) + + test "logs a detailed warning for invalid UTF-8 input in position #{byte_size(@invalid_utf8) - 1}", + context do + log = + capture_log(fn -> + assert :ok = + Utils.validate_utf8!( + context.invalid_utf8, + context.exception, + context.context, + context.error_code + ) + end) + + expected_log_regex = + ~r/^.*?invalid UTF-8 on test context, got byte 255 in position #{byte_size(@invalid_utf8) - 1}/i + + assert String.match?(log, expected_log_regex) + end + end end diff --git a/test/plug/conn_test.exs b/test/plug/conn_test.exs index 381a99c4..561f94cb 100644 --- a/test/plug/conn_test.exs +++ b/test/plug/conn_test.exs @@ -921,7 +921,7 @@ defmodule Plug.ConnTest do conn = conn(:get, "/foo?a=" <> <<139>>) assert_raise Plug.Conn.InvalidQueryError, - "invalid UTF-8 on urlencoded params, got byte 139", + "invalid UTF-8 on urlencoded params, got byte 139 in position 0", fn -> fetch_query_params(conn) end @@ -929,7 +929,7 @@ defmodule Plug.ConnTest do conn = conn(:get, "/foo?a=" <> URI.encode_www_form(<<139>>)) assert_raise Plug.Conn.InvalidQueryError, - "invalid UTF-8 on urlencoded params, got byte 139", + "invalid UTF-8 on urlencoded params, got byte 139 in position 0", fn -> fetch_query_params(conn) end diff --git a/test/plug/parsers_test.exs b/test/plug/parsers_test.exs index a6f84213..d824c776 100644 --- a/test/plug/parsers_test.exs +++ b/test/plug/parsers_test.exs @@ -96,7 +96,7 @@ defmodule Plug.ParsersTest do conn = conn(:post, "/?foo=#{<<139>>}") assert_raise Plug.Conn.InvalidQueryError, - "invalid UTF-8 on urlencoded params, got byte 139", + "invalid UTF-8 on urlencoded params, got byte 139 in position 0", fn -> parse(%{conn | body_params: %{"foo" => "baz"}, params: %{"foo" => "baz"}}) end @@ -109,7 +109,7 @@ defmodule Plug.ParsersTest do assert_raise( Plug.Parsers.BadEncodingError, - "invalid UTF-8 on urlencoded params, got byte 139", + "invalid UTF-8 on urlencoded params, got byte 139 in position 0", fn -> parse(conn, validate_utf8: true) end @@ -327,7 +327,7 @@ defmodule Plug.ParsersTest do end test "raises on invalid url encoded" do - message = "invalid UTF-8 on urlencoded params, got byte 139" + message = "invalid UTF-8 on urlencoded params, got byte 139 in position 0" assert_raise Plug.Parsers.BadEncodingError, message, fn -> conn(:post, "/foo", "a=" <> <<139>>)