From a181f00f96e8138473fd97135843f990554fd142 Mon Sep 17 00:00:00 2001 From: TEC Date: Sun, 11 Aug 2024 12:54:00 +0800 Subject: [PATCH] Add filesystem func to transform a path to a URI In a few places across Base and the stdlib, we emit paths that we like people to be able to click on in their terminal and editor. Up to this point, we have relied on auto-filepath detection, but this does not allow for alternative link text, such as contracted paths. Doing so (via OSC 8 terminal links for example) requires filepath URI encoding. This functionality was previously part of a PR modifying stacktrace printing, but after that became held up for unrelated reasons and another PR appeared that would benefit from this utility, I've split out this functionality so it can be used before the stacktrace printing PR is resolved. --- base/path.jl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ test/path.jl | 13 ++++++++++++ 2 files changed, 69 insertions(+) diff --git a/base/path.jl b/base/path.jl index 3b8124f34f174a..4f03419e31bb08 100644 --- a/base/path.jl +++ b/base/path.jl @@ -613,3 +613,59 @@ relpath(path::AbstractString, startpath::AbstractString) = for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath) @eval $f(path::AbstractString) = $f(String(path)) end + +""" + uripath(path::AbstractString) + +Encode `path` as a URI as per [RFC8089: The "file" URI +Scheme](https://www.rfc-editor.org/rfc/rfc8089), [RFC3986: Uniform Resource +Identifier (URI): Generic Syntax](https://www.rfc-editor.org/rfc/rfc3986), and +the [Freedesktop File URI spec](https://www.freedesktop.org/wiki/Specifications/file-uri-spec/). + +## Examples + +```julia-repl +julia> uripath("/home/user/example file.jl") # On a unix machine +"file:///home/user/example%20file.jl" + +juila> uripath("C:\\Users\\user\\example file.jl") # On a windows machine +"file:///C:/Users/user/example%20file.jl" +``` +""" +function uripath end + +@static if Sys.iswindows() + function uripath(path::String) + percent_escape(s) = # RFC3986 Section 2.1 + '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') + encode_uri_component(s) = # RFC3986 Section 2.3 + replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) + path = abspath(path) + if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3 + unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/') + string("file://", encode_uri_component(unixpath)) # RFC8089 Section 2 + else + drive, localpath = splitdrive(path) # Assuming that non-UNC absolute paths on Windows always have a drive component + unixpath = join(eachsplit(localpath, path_separator_re, keepempty=false), '/') + encdrive = replace(encode_uri_component(drive), "%3A" => ':', "%7C" => '|') # RFC8089 Appendices D.2, E.2.1, and E.2.2 + string("file:///", encdrive, '/', encode_uri_component(unixpath)) # RFC8089 Section 2 + end + end +else + function uripath(path::String) + percent_escape(s) = # RFC3986 Section 2.1 + '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%') + encode_uri_component(s) = # RFC3986 Section 2.3 + replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape) + localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/') + host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil + distro = get(ENV, "WSL_DISTRO_NAME", "") + "wsl\$/$distro" # See + else + gethostname() # Freedesktop File URI Spec, Hostnames section + end + string("file://", encode_uri_component(host), '/', encode_uri_component(localpath)) # RFC8089 Section 2 + end +end + +uripath(path::AbstractString) = uripath(String(path)) diff --git a/test/path.jl b/test/path.jl index 2f4f2d0983a58e..4c2c7034577d51 100644 --- a/test/path.jl +++ b/test/path.jl @@ -311,6 +311,19 @@ test_relpath() end + @testset "uripath" begin + host = if Sys.iswindows() "" else gethostname() end + sysdrive, uridrive = if Sys.iswindows() "C:\\", "C:/" else "/", "" end + @test Base.Filesystem.uripath("$(sysdrive)some$(sep)file.txt") == "file://$host/$(uridrive)some/file.txt" + @test Base.Filesystem.uripath("$(sysdrive)another$(sep)$(sep)folder$(sep)file.md") == "file://$host/$(uridrive)another/folder/file.md" + @test Base.Filesystem.uripath("$(sysdrive)some file with ^odd% chars") == "file://$host/$(uridrive)some%20file%20with%20%5Eodd%25%20chars" + @test Base.Filesystem.uripath("$(sysdrive)weird chars like @#&()[]{}") == "file://$host/$(uridrive)weird%20chars%20like%20%40%23%26%28%29%5B%5D%7B%7D" + @test Base.Filesystem.uripath("$sysdrive") == "file://$host/$uridrive" + @test Base.Filesystem.uripath(".") == Base.Filesystem.uripath(pwd()) + @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)Δεδομένα") == "file://$host/$(uridrive)unicode/%CE%94%CE%B5%CE%B4%CE%BF%CE%BC%CE%AD%CE%BD%CE%B1" + @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)🧮🐛🔨") == "file://$host/$(uridrive)unicode/%F0%9F%A7%AE%F0%9F%90%9B%F0%9F%94%A8" + end + if Sys.iswindows() @testset "issue #23646" begin @test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"