From 0b29f7297987a93fb2ca971aac6607a1b8df22b6 Mon Sep 17 00:00:00 2001 From: Samuel Henrique Date: Wed, 31 Dec 2025 22:16:39 -0800 Subject: [PATCH] Don't percent-decode colons ":", and percent encode them when found Windows has this fun thing called NTFS Streams, which can lead to the file contents being stored in an unusual place if the target contains colons (unusual place = NTFS file stream). This could also result in the file being stored at the root of some other mount point (e.g.: C:filename). Users can still avail of storing the file contents in a stream or in another mountpoints by explicitly choosing the output file name. --- tests/tests.sh | 21 +++++++++++++++++++++ wcurl | 6 ++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/tests/tests.sh b/tests/tests.sh index ccfc23b..c2c531f 100755 --- a/tests/tests.sh +++ b/tests/tests.sh @@ -207,6 +207,27 @@ testUrlDecodingBackslashes() assertContains "Verify whether 'wcurl' successfully uses the default filename when the URL ends with a slash" "${ret}" '--output filename%5Cwith%2Fbackslashes%5c%2f' } +testUrlDecodingColon() +{ + url='example.com/filename%3Awith%3Acolons%3a' + ret=$(${WCURL_CMD} ${url} 2>&1 | tr '\n' ' ') + assertContains "Verify whether 'wcurl' successfully uses the default filename when the URL ends with a slash" "${ret}" '--output filename%3Awith%3Acolons%3a' +} + +testUrlEncodeColon() +{ + url='example.com/filename:with:colons:' + ret=$(${WCURL_CMD} ${url} 2>&1 | tr '\n' ' ') + assertContains "Verify whether 'wcurl' successfully uses the default filename when the URL ends with a slash" "${ret}" '--output filename%3Awith%3Acolons%3A' +} + +testUrlAllowColonWhenOutput() +{ + url='example.com/filename:with:colons:' + ret=$(${WCURL_CMD} ${url} -o "i:want:colons:here" 2>&1 | tr '\n' ' ') + assertContains "Verify whether 'wcurl' successfully uses the default filename when the URL ends with a slash" "${ret}" '--output i:want:colons:here' +} + # Test decoding a bunch of different languages (that do not use the latin # alphabet), we could split each language on its own test, but for now it # does not make a difference. diff --git a/wcurl b/wcurl index c5dd6d1..45fd5ca 100755 --- a/wcurl +++ b/wcurl @@ -118,7 +118,8 @@ readonly PER_URL_PARAMETERS="\ # characters. # 2F = / # 5C = \ -readonly UNSAFE_PERCENT_ENCODE="%2F %5C" +# 3A = : +readonly UNSAFE_PERCENT_ENCODE="%2F %5C %3A" # Whether to invoke curl or not. DRY_RUN="false" @@ -193,7 +194,8 @@ get_url_filename() # If what remains contains a slash, there is a path; return it percent-decoded. case "${hostname_and_path}" in # sed to remove everything preceding the last '/', e.g.: "example/something" becomes "something" - */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,')" ;; + # sed to also replace ':' with the percent_encoded %3A + */*) percent_decode "$(printf %s "${hostname_and_path}" | sed -e 's,^.*/,,' -e 's,:,%3A,g')" ;; esac # No slash means there was just a hostname and no path; return empty string. }