Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url: refactor pathToFileURL to native #55476

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
85 changes: 9 additions & 76 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ const {
ObjectSetPrototypeOf,
ReflectGetOwnPropertyDescriptor,
ReflectOwnKeys,
RegExpPrototypeSymbolReplace,
SafeMap,
SafeSet,
StringPrototypeCharAt,
Expand Down Expand Up @@ -777,6 +776,8 @@ function isURL(self) {
* for invalid URL inputs.
*/
const kParseURLSymbol = Symbol('kParseURL');
const kCreateURLFromPosixPathSymbol = Symbol('kCreateURLFromPosixPath');
const kCreateURLFromWindowsPathSymbol = Symbol('kCreateURLFromWindowsPath');

class URL {
#context = new URLContext();
Expand Down Expand Up @@ -811,7 +812,11 @@ class URL {
}

const raiseException = parseSymbol !== kParseURLSymbol;
const href = bindingUrl.parse(input, base, raiseException);
const interpretAsWindowsPath = parseSymbol === kCreateURLFromWindowsPathSymbol;
const pathToFileURL = interpretAsWindowsPath || (parseSymbol === kCreateURLFromPosixPathSymbol);
const href = pathToFileURL ?
bindingUrl.pathToFileURL(input, interpretAsWindowsPath, base) :
bindingUrl.parse(input, base, raiseException);
if (href) {
this.#updateContext(href);
}
Expand Down Expand Up @@ -1498,76 +1503,9 @@ function fileURLToPath(path, options = kEmptyObject) {
return (windows ?? isWindows) ? getPathFromURLWin32(path) : getPathFromURLPosix(path);
}

// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
const percentRegEx = /%/g;
const newlineRegEx = /\n/g;
const carriageReturnRegEx = /\r/g;
const tabRegEx = /\t/g;
const quoteRegEx = /"/g;
const hashRegex = /#/g;
const spaceRegEx = / /g;
const questionMarkRegex = /\?/g;
const openSquareBracketRegEx = /\[/g;
const backslashRegEx = /\\/g;
const closeSquareBracketRegEx = /]/g;
const caretRegEx = /\^/g;
const verticalBarRegEx = /\|/g;
const tildeRegEx = /~/g;

function encodePathChars(filepath, options = kEmptyObject) {
if (StringPrototypeIncludes(filepath, '%')) {
filepath = RegExpPrototypeSymbolReplace(percentRegEx, filepath, '%25');
}

if (StringPrototypeIncludes(filepath, '\t')) {
filepath = RegExpPrototypeSymbolReplace(tabRegEx, filepath, '%09');
}
if (StringPrototypeIncludes(filepath, '\n')) {
filepath = RegExpPrototypeSymbolReplace(newlineRegEx, filepath, '%0A');
}
if (StringPrototypeIncludes(filepath, '\r')) {
filepath = RegExpPrototypeSymbolReplace(carriageReturnRegEx, filepath, '%0D');
}
if (StringPrototypeIncludes(filepath, ' ')) {
filepath = RegExpPrototypeSymbolReplace(spaceRegEx, filepath, '%20');
}
if (StringPrototypeIncludes(filepath, '"')) {
filepath = RegExpPrototypeSymbolReplace(quoteRegEx, filepath, '%22');
}
if (StringPrototypeIncludes(filepath, '#')) {
filepath = RegExpPrototypeSymbolReplace(hashRegex, filepath, '%23');
}
if (StringPrototypeIncludes(filepath, '?')) {
filepath = RegExpPrototypeSymbolReplace(questionMarkRegex, filepath, '%3F');
}
if (StringPrototypeIncludes(filepath, '[')) {
filepath = RegExpPrototypeSymbolReplace(openSquareBracketRegEx, filepath, '%5B');
}
// Back-slashes must be special-cased on Windows, where they are treated as path separator.
if (!options.windows && StringPrototypeIncludes(filepath, '\\')) {
filepath = RegExpPrototypeSymbolReplace(backslashRegEx, filepath, '%5C');
}
if (StringPrototypeIncludes(filepath, ']')) {
filepath = RegExpPrototypeSymbolReplace(closeSquareBracketRegEx, filepath, '%5D');
}
if (StringPrototypeIncludes(filepath, '^')) {
filepath = RegExpPrototypeSymbolReplace(caretRegEx, filepath, '%5E');
}
if (StringPrototypeIncludes(filepath, '|')) {
filepath = RegExpPrototypeSymbolReplace(verticalBarRegEx, filepath, '%7C');
}
if (StringPrototypeIncludes(filepath, '~')) {
filepath = RegExpPrototypeSymbolReplace(tildeRegEx, filepath, '%7E');
}

return filepath;
}

function pathToFileURL(filepath, options = kEmptyObject) {
const windows = options?.windows ?? isWindows;
if (windows && StringPrototypeStartsWith(filepath, '\\\\')) {
const outURL = new URL('file://');
// UNC path format: \\server\share\resource
// Handle extended UNC path and standard UNC path
// "\\?\UNC\" path prefix should be ignored.
Expand All @@ -1590,16 +1528,11 @@ function pathToFileURL(filepath, options = kEmptyObject) {
);
}
const hostname = StringPrototypeSlice(filepath, prefixLength, hostnameEndIndex);
outURL.hostname = domainToASCII(hostname);
outURL.pathname = encodePathChars(
RegExpPrototypeSymbolReplace(backslashRegEx, StringPrototypeSlice(filepath, hostnameEndIndex), '/'),
{ windows },
);
return outURL;
return new URL(StringPrototypeSlice(filepath, hostnameEndIndex), hostname, kCreateURLFromWindowsPathSymbol);
}
const resolved = windows ? path.win32.resolve(filepath) : path.posix.resolve(filepath);

return new URL(`file://${encodePathChars(resolved, { windows })}`);
return new URL(resolved, undefined, windows ? kCreateURLFromWindowsPathSymbol : kCreateURLFromPosixPathSymbol);
}

function toPathIfFileURL(fileURLOrPath) {
Expand Down
105 changes: 105 additions & 0 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,109 @@ void BindingData::Deserialize(v8::Local<v8::Context> context,
CHECK_NOT_NULL(binding);
}

#ifndef LARGEST_ASCII_CHAR_CODE_TO_ENCODE
#define LARGEST_ASCII_CHAR_CODE_TO_ENCODE '~'
#endif

// RFC1738 defines the following chars as "unsafe" for URLs
// @see https://www.ietf.org/rfc/rfc1738.txt 2.2. URL Character Encoding Issues
std::array<std::string, LARGEST_ASCII_CHAR_CODE_TO_ENCODE + 1> lookup_table =
[]() {
std::array<std::string, LARGEST_ASCII_CHAR_CODE_TO_ENCODE + 1> result{};

for (uint8_t i = 0; i <= LARGEST_ASCII_CHAR_CODE_TO_ENCODE; i++) {
if (i == '%')
result[i] = "%25";
else if (i == '\t')
result[i] = "%09";
else if (i == '\n')
result[i] = "%0A";
else if (i == '\r')
result[i] = "%0D";
else if (i == ' ')
result[i] = "%20";
else if (i == '"')
result[i] = "%22";
else if (i == '#')
result[i] = "%23";
else if (i == '?')
result[i] = "%3F";
else if (i == '[')
result[i] = "%5B";
else if (i == '\\')
result[i] = "%5C";
else if (i == ']')
result[i] = "%5D";
else if (i == '^')
result[i] = "%5E";
else if (i == '|')
result[i] = "%7C";
else if (i == '~')
result[i] = "%7E";
else
result[i] = std::string(1, static_cast<char>(i));
anonrig marked this conversation as resolved.
Show resolved Hide resolved
}

return result;
}();

enum class OS { WINDOWS, POSIX };

std::string EncodePathChars(std::string_view input_str, OS operating_system) {
std::string encoded = "file://";
encoded.reserve(input_str.size() +
7); // Reserve space for "file://" and input_str
for (size_t i : input_str) {
if (i > LARGEST_ASCII_CHAR_CODE_TO_ENCODE) [[unlikely]] {
encoded.push_back(i);
continue;
}
if (operating_system == OS::WINDOWS) {
if (i == '\\') {
encoded.push_back('/');
continue;
}
}
encoded.append(lookup_table[i]);
}

return encoded;
}

void BindingData::PathToFileURL(const FunctionCallbackInfo<Value>& args) {
CHECK_GE(args.Length(), 2); // input
CHECK(args[0]->IsString());
CHECK(args[1]->IsBoolean());

Realm* realm = Realm::GetCurrent(args);
BindingData* binding_data = realm->GetBindingData<BindingData>();
Isolate* isolate = realm->isolate();
OS os = args[1]->IsTrue() ? OS::WINDOWS : OS::POSIX;

Utf8Value input(isolate, args[0]);
auto input_str = input.ToStringView();
CHECK(!input_str.empty());

auto out =
ada::parse<ada::url_aggregator>(EncodePathChars(input_str, os), nullptr);

if (!out) {
return ThrowInvalidURL(realm->env(), input.ToStringView(), nullptr);
}

if (os == OS::WINDOWS && args.Length() > 2 && !args[2]->IsUndefined())
[[unlikely]] {
CHECK(args[2]->IsString());
Utf8Value hostname(isolate, args[2]);
CHECK(out->set_hostname(hostname.ToStringView()));
}

binding_data->UpdateComponents(out->get_components(), out->type);

args.GetReturnValue().Set(
ToV8Value(realm->context(), out->get_href(), isolate).ToLocalChecked());
}

void BindingData::DomainToASCII(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_GE(args.Length(), 1); // input
Expand Down Expand Up @@ -371,6 +474,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
SetMethodNoSideEffect(isolate, target, "format", Format);
SetMethodNoSideEffect(isolate, target, "getOrigin", GetOrigin);
SetMethod(isolate, target, "parse", Parse);
SetMethod(isolate, target, "pathToFileURL", PathToFileURL);
aduh95 marked this conversation as resolved.
Show resolved Hide resolved
SetMethod(isolate, target, "update", Update);
SetFastMethodNoSideEffect(
isolate, target, "canParse", CanParse, {fast_can_parse_methods_, 2});
Expand All @@ -391,6 +495,7 @@ void BindingData::RegisterExternalReferences(
registry->Register(Format);
registry->Register(GetOrigin);
registry->Register(Parse);
registry->Register(PathToFileURL);
registry->Register(Update);
registry->Register(CanParse);
registry->Register(FastCanParse);
Expand Down
1 change: 1 addition & 0 deletions src/node_url.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class BindingData : public SnapshotableObject {
static void Format(const v8::FunctionCallbackInfo<v8::Value>& args);
static void GetOrigin(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Parse(const v8::FunctionCallbackInfo<v8::Value>& args);
static void PathToFileURL(const v8::FunctionCallbackInfo<v8::Value>& args);
static void Update(const v8::FunctionCallbackInfo<v8::Value>& args);

static void CreatePerIsolateProperties(IsolateData* isolate_data,
Expand Down
Loading