Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to decide what to do on invalid UTF-8 urlencoded params #1200

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7793604
minor edits to documentation for clarity
bradhanks Sep 13, 2023
557df29
Merge branch 'elixir-plug:main' into main
bradhanks Nov 16, 2023
8e7570f
Merge branch 'elixir-plug:main' into main
bradhanks Nov 17, 2023
af345e3
Merge branch 'elixir-plug:main' into main
bradhanks Dec 23, 2023
2e1769d
added :utf8_error_code; modified the exception raised by binary strin…
bradhanks Dec 23, 2023
a5127dd
refactored code, passes all tests
bradhanks Dec 23, 2023
5b740cd
Merge branch 'elixir-plug:main' into utf8_error
bradhanks Dec 27, 2023
ea00b54
refactored validate_utf8\!/3 to validate_utf8\!/4; created tests
bradhanks Dec 27, 2023
ea3169b
added documentation
bradhanks Dec 27, 2023
9d7e331
testing
bradhanks Dec 27, 2023
264be80
changes to the other module tests to incorporate the addition of posi…
bradhanks Dec 27, 2023
8d50863
added byte position to the error message, added ASCII optimization fo…
bradhanks Dec 27, 2023
bfb4476
reverted accidentally adding .vscode file to PR
bradhanks Dec 27, 2023
8753900
gitignore .vscode
bradhanks Dec 27, 2023
15d7043
delete .vscode from commit attempt 2
bradhanks Dec 27, 2023
ced8d9a
test debugging from previous merge conflict
bradhanks Dec 27, 2023
4265e2e
remove require Logger from test
bradhanks Dec 27, 2023
b8f33f2
pass PR test
bradhanks Dec 27, 2023
b31ee07
pass PR test
bradhanks Dec 27, 2023
ad3847a
pass PR test
bradhanks Dec 27, 2023
d8f68dd
pass PR test
bradhanks Dec 27, 2023
1f46f7a
pass PR test
bradhanks Dec 27, 2023
5bc1b3a
pass PR test
bradhanks Dec 27, 2023
00e6429
pass PR test
bradhanks Dec 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
/doc
erl_crash.dump
*.ez
/.vscode
77 changes: 67 additions & 10 deletions lib/plug/conn/utils.ex
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
defmodule Plug.Conn.Utils do
require Logger

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@moduledoc """
Utilities for working with connection data
"""
Expand All @@ -11,6 +13,7 @@ defmodule Plug.Conn.Utils do
@other [?., ?-, ?+]
@space [?\s, ?\t]
@specials ~c|()<>@,;:\\"/[]?={}|
@utf8_error_code Application.compile_env(:plug, :utf8_error_code, 500)

@doc ~S"""
Parses media types (with wildcards).
Expand Down Expand Up @@ -278,24 +281,78 @@ defmodule Plug.Conn.Utils do
end

@doc """
Validates the given binary is valid UTF-8.
Validates if the given binary data is valid UTF-8.

This function checks a binary string to determine if it is a valid UTF-8 encoded string.
It operates based on the given `error_code`, which dictates the behavior when encountering invalid UTF-8 bytes.

## Parameters

- `binary`: The binary data to be checked.
- `exception`: The exception module to be used for raising errors in case of invalid UTF-8.
- `context`: A string providing context about the binary data being checked.
- `error_code`: An integer that determines the behavior when invalid UTF-8 is encountered.
- `500` will raise the specified `exception` (default).
- `404` will return an error tuple.
- Codes within `100..999` will log a warning.

## Examples

iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context")
** (RuntimeError) invalid UTF-8 on test context, got byte 255 in position 0

iex> Plug.Conn.Utils.validate_utf8!("valid string", RuntimeError, "test context", 404)
:ok

iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context", 404)
{:error, "invalid UTF-8 on test context, got byte 255 in position 0"}



# Example with logging for invalid UTF-8
iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context", 200)
:ok
# Logs "invalid UTF-8 on test context, got byte 255 in position 0"

"""
@spec validate_utf8!(binary, module, binary) :: :ok | no_return
def validate_utf8!(binary, exception, context)
@spec validate_utf8!(binary, module, binary, integer()) ::
:ok | no_return | {:error, String.t()}
def validate_utf8!(binary, exception, context, error_code \\ @utf8_error_code)

def validate_utf8!(<<binary::binary>>, exception, context, error_code) do
do_validate_utf8!(binary, exception, context, error_code, 0)
end

def validate_utf8!(<<binary::binary>>, exception, context) do
do_validate_utf8!(binary, exception, context)
defp do_validate_utf8!(<<a::56, rest::bits>>, exception, context, error_code, byte_position)
when Bitwise.band(a, 0x80808080808080) == 0 do
do_validate_utf8!(rest, exception, context, error_code, byte_position + 7)
end

defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context) do
do_validate_utf8!(rest, exception, context)
defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context, error_code, byte_position) do
do_validate_utf8!(rest, exception, context, error_code, byte_position + 1)
end

defp do_validate_utf8!(<<byte, _::bits>>, exception, context) do
raise exception, "invalid UTF-8 on #{context}, got byte #{byte}"
defp do_validate_utf8!(<<byte, _::bits>>, exception, context, error_code, byte_position) do
case error_code do
500 ->
raise exception,
"invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}"

404 ->
{:error, "invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}"}

error_code when error_code in 100..999 ->
:ok =
Logger.warning(
"invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}",
error: @utf8_error_code,
context: context,
byte: byte
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should always raise, and not behave differently based on the format.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What information should be in the exception? I guess I'm not exactly clear on what exactly should change with different error codes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want a way to not raise, then we should create a validate_utf8 that returns {:error, Exception} without raising. But we should not use the status code to control that. It is unclear what is the ultimate gain here.

end
end

defp do_validate_utf8!(<<>>, _exception, _context) do
defp do_validate_utf8!(<<>>, _exception, _context, _error_code, _byte_position) do
:ok
end

Expand Down
77 changes: 77 additions & 0 deletions test/plug/conn/utils_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,83 @@
defmodule Plug.Conn.UtilsTest do
use ExUnit.Case, async: true
import ExUnit.CaptureLog

import Plug.Conn.Utils
alias Plug.Conn.Utils, as: Utils
doctest Plug.Conn.Utils

Check failure on line 7 in test/plug/conn/utils_test.exs

View workflow job for this annotation

GitHub Actions / test (1.10.4, 21.3, false)

doctest Plug.Conn.Utils.validate_utf8!/4 (42) (Plug.Conn.UtilsTest)

@exception RuntimeError
@context "test context"
@valid_utf8 "utm_campaign=summer+sale&foo=bar&utm_medium=email&utm_source=sendgrid.com&utm_term=utm_term&utm_content=utm_content&utm_id=utm_id"
@invalid_utf8 <<"utm_campaign=summer+sale&foo=bar&utm_medium=email&utm_source=sen", 255>>

setup_all do
%{
exception: @exception,
context: @context,
valid_utf8: @valid_utf8,
invalid_utf8: @invalid_utf8
}
end

describe "validate_utf8! with error_code 500" do
setup context, do: Map.merge(context, %{error_code: 500})

test "raises an exception for invalid UTF-8 input", context do
assert_raise context.exception,
"invalid UTF-8 on #{context.context}, got byte 255 in position #{byte_size(@invalid_utf8) - 1}",
fn ->
Utils.validate_utf8!(
context.invalid_utf8,
context.exception,
context.context,
context.error_code
)
end
end
end

describe "validate_utf8! with error_code 404" do
setup context, do: Map.merge(context, %{error_code: 404})

test "returns {:error, message} for invalid UTF-8 w/ error code 404",
context_map do
%{context: context} = context_map

error_tuple =
{:error,
"invalid UTF-8 on #{context}, got byte 255 in position #{byte_size(@invalid_utf8) - 1}"}

assert error_tuple ==
Utils.validate_utf8!(
context_map.invalid_utf8,
context_map.exception,
context_map.context,
context_map.error_code
)
end
end

describe "validate_utf8! with error_code 401" do
setup context, do: Map.merge(context, %{error_code: 401})

test "logs a detailed warning for invalid UTF-8 input in position #{byte_size(@invalid_utf8) - 1}",

Check failure on line 64 in test/plug/conn/utils_test.exs

View workflow job for this annotation

GitHub Actions / test (1.10.4, 21.3, false)

test validate_utf8! with error_code 401 logs a detailed warning for invalid UTF-8 input in position 64 (Plug.Conn.UtilsTest)
context do
log =
capture_log(fn ->
assert :ok =
Utils.validate_utf8!(
context.invalid_utf8,
context.exception,
context.context,
context.error_code
)
end)

expected_log_regex =
~r/^.*?invalid UTF-8 on test context, got byte 255 in position #{byte_size(@invalid_utf8) - 1}/i

assert String.match?(log, expected_log_regex)
end
end
end
4 changes: 2 additions & 2 deletions test/plug/conn_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -921,15 +921,15 @@ defmodule Plug.ConnTest do
conn = conn(:get, "/foo?a=" <> <<139>>)

assert_raise Plug.Conn.InvalidQueryError,
"invalid UTF-8 on urlencoded params, got byte 139",
"invalid UTF-8 on urlencoded params, got byte 139 in position 0",
fn ->
fetch_query_params(conn)
end

conn = conn(:get, "/foo?a=" <> URI.encode_www_form(<<139>>))

assert_raise Plug.Conn.InvalidQueryError,
"invalid UTF-8 on urlencoded params, got byte 139",
"invalid UTF-8 on urlencoded params, got byte 139 in position 0",
fn ->
fetch_query_params(conn)
end
Expand Down
6 changes: 3 additions & 3 deletions test/plug/parsers_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ defmodule Plug.ParsersTest do
conn = conn(:post, "/?foo=#{<<139>>}")

assert_raise Plug.Conn.InvalidQueryError,
"invalid UTF-8 on urlencoded params, got byte 139",
"invalid UTF-8 on urlencoded params, got byte 139 in position 0",
fn ->
parse(%{conn | body_params: %{"foo" => "baz"}, params: %{"foo" => "baz"}})
end
Expand All @@ -109,7 +109,7 @@ defmodule Plug.ParsersTest do

assert_raise(
Plug.Parsers.BadEncodingError,
"invalid UTF-8 on urlencoded params, got byte 139",
"invalid UTF-8 on urlencoded params, got byte 139 in position 0",
fn ->
parse(conn, validate_utf8: true)
end
Expand Down Expand Up @@ -327,7 +327,7 @@ defmodule Plug.ParsersTest do
end

test "raises on invalid url encoded" do
message = "invalid UTF-8 on urlencoded params, got byte 139"
message = "invalid UTF-8 on urlencoded params, got byte 139 in position 0"

assert_raise Plug.Parsers.BadEncodingError, message, fn ->
conn(:post, "/foo", "a=" <> <<139>>)
Expand Down
Loading