From 7eb29c9aa5de612d172ccc665d3284017ba4d996 Mon Sep 17 00:00:00 2001 From: Rene Zander Date: Tue, 31 Mar 2026 15:16:07 +0000 Subject: [PATCH 1/3] fix(ext/web): handle Windows file paths in URL parsing Implement WHATWG URL spec change (url#874) to detect Windows drive letter patterns (e.g., C:\path\file.txt) in the URL parser's scheme start state and automatically convert them to file:/// URLs (file:///C:/path/file.txt). The spec adds a check: when parsing encounters a single ASCII alpha letter as the scheme buffer, followed by ':' and '\', it recognizes this as a Windows drive path rather than a URL scheme. The parser then sets the scheme to "file", the host to empty string, and transitions to path state with backslashes normalized to forward slashes. This is implemented as a preprocessing step in the Rust parse_url function, before the input reaches the rust-url crate parser. --- ext/web/url.rs | 40 +++++++++++++++++++++++++++++++++++++++- tests/unit/url_test.ts | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/ext/web/url.rs b/ext/web/url.rs index 51ff5e3a0c975f..5e03081b6e7631 100644 --- a/ext/web/url.rs +++ b/ext/web/url.rs @@ -50,6 +50,31 @@ pub fn op_url_parse( parse_url(state, href, None, buf) } +/// Detect Windows file paths like `C:\path\file.txt` and convert them to +/// `file:///C:/path/file.txt` per the WHATWG URL spec change (url#874). +/// +/// A Windows drive letter pattern is: a single ASCII alpha followed by `:` +/// followed by `\`. This is detected in the "scheme start state" of the URL +/// parser: the single letter would be parsed as a scheme, but the `:\` +/// combination signals a Windows path instead. +#[inline] +fn maybe_convert_windows_path_to_file_url(href: &str) -> Option { + let bytes = href.as_bytes(); + // Must be at least 3 chars: letter, colon, backslash + if bytes.len() >= 3 + && bytes[0].is_ascii_alphabetic() + && bytes[1] == b':' + && bytes[2] == b'\\' + { + let drive_letter = bytes[0] as char; + let rest = &href[3..]; + let path = rest.replace('\\', "/"); + Some(format!("file:///{drive_letter}:/{path}")) + } else { + None + } +} + /// `op_url_parse` and `op_url_parse_with_base` share the same implementation. /// /// This function is used to parse the URL and fill the `buf` with internal @@ -81,7 +106,20 @@ fn parse_url( base_href: Option<&Url>, buf: &mut [u32], ) -> u32 { - match Url::options().base_url(base_href).parse(href) { + // Per WHATWG URL spec (url#874): detect Windows drive letter paths + // (e.g., "C:\path\file") and convert to file:/// URLs. The spec's + // scheme start state triggers this conversion regardless of base URL + // presence, since the single-letter "scheme" + `:` + `\` pattern + // unambiguously indicates a Windows drive path. + let converted; + let effective_href = + if let Some(file_url) = maybe_convert_windows_path_to_file_url(href) { + converted = file_url; + converted.as_str() + } else { + href + }; + match Url::options().base_url(base_href).parse(effective_href) { Ok(url) => { let inner_url = quirks::internal_components(&url); diff --git a/tests/unit/url_test.ts b/tests/unit/url_test.ts index 2224d00665697a..15336c4f60bb74 100644 --- a/tests/unit/url_test.ts +++ b/tests/unit/url_test.ts @@ -527,3 +527,45 @@ Deno.test(function urlTakeURLObjectAsParameter() { "https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat", ); }); + +// WHATWG URL spec change: Windows file path handling (url#874) +Deno.test(function urlWindowsFilePathBasic() { + const url = new URL("C:\\path\\file.txt"); + assertEquals(url.href, "file:///C:/path/file.txt"); + assertEquals(url.protocol, "file:"); + assertEquals(url.pathname, "/C:/path/file.txt"); + assertEquals(url.host, ""); +}); + +Deno.test(function urlWindowsFilePathDifferentDrives() { + assertEquals(new URL("D:\\foo\\bar.exe").href, "file:///D:/foo/bar.exe"); + assertEquals( + new URL("Z:\\deep\\nested\\path.rs").href, + "file:///Z:/deep/nested/path.rs", + ); +}); + +Deno.test(function urlWindowsFilePathLowercaseDrive() { + assertEquals(new URL("c:\\folder\\file.txt").href, "file:///c:/folder/file.txt"); +}); + +Deno.test(function urlWindowsFilePathMixedSeparators() { + assertEquals( + new URL("C:\\path/mixed\\separators/file.txt").href, + "file:///C:/path/mixed/separators/file.txt", + ); +}); + +Deno.test(function urlWindowsFilePathWithBase() { + const url = new URL("C:\\path\\file.node", "http://example.org/"); + assertEquals(url.href, "file:///C:/path/file.node"); + assertEquals(url.protocol, "file:"); +}); + +Deno.test(function urlWindowsFilePathUrlParse() { + const url = URL.parse("C:\\folder\\file.txt"); + assert(url !== null); + assertEquals(url.href, "file:///C:/folder/file.txt"); + assertEquals(url.protocol, "file:"); + assertEquals(url.pathname, "/C:/folder/file.txt"); +}); From 4d2ae2ce8fbf2709039ebf91d82aa87b61396eb5 Mon Sep 17 00:00:00 2001 From: Rene Zander Date: Thu, 2 Apr 2026 05:30:47 +0000 Subject: [PATCH 2/3] fix: format assertEquals to satisfy dprint --- tests/unit/url_test.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/unit/url_test.ts b/tests/unit/url_test.ts index 15336c4f60bb74..2547d83a90dc07 100644 --- a/tests/unit/url_test.ts +++ b/tests/unit/url_test.ts @@ -546,7 +546,10 @@ Deno.test(function urlWindowsFilePathDifferentDrives() { }); Deno.test(function urlWindowsFilePathLowercaseDrive() { - assertEquals(new URL("c:\\folder\\file.txt").href, "file:///c:/folder/file.txt"); + assertEquals( + new URL("c:\\folder\\file.txt").href, + "file:///c:/folder/file.txt", + ); }); Deno.test(function urlWindowsFilePathMixedSeparators() { From 1257494ddc336be7c44823a5fee4df234e08f64b Mon Sep 17 00:00:00 2001 From: Rene Zander Date: Mon, 4 May 2026 13:26:40 +0000 Subject: [PATCH 3/3] fix(ext/web): add WPT regression guard for Windows-path heuristic Address review feedback (fibibot CHANGES_REQUESTED): the previous forward-slash extension over-matched scheme URLs (`a://example.net`, `c:/foo`, `h://.`, `w://x:0`) and broke 4 WPT url-setters tests. Revert the heuristic to backslash-only (matches Node behavior) and add a regression test pinning the four formerly-regressing scheme URLs to their unmodified parse output. Forward-slash drive paths now require explicit `file:///C:/path`; supporting bare `C:/path` cleanly needs a scheme-aware check that I'm leaving for a follow-up. --- ext/web/url.rs | 6 ++++++ tests/unit/url_test.ts | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/ext/web/url.rs b/ext/web/url.rs index 5e03081b6e7631..fca585f1c3032c 100644 --- a/ext/web/url.rs +++ b/ext/web/url.rs @@ -57,6 +57,12 @@ pub fn op_url_parse( /// followed by `\`. This is detected in the "scheme start state" of the URL /// parser: the single letter would be parsed as a scheme, but the `:\` /// combination signals a Windows path instead. +/// +/// Forward-slash drive paths (e.g. `C:/path`) are intentionally NOT converted: +/// the `letter:/` shape is ambiguous with `letter:` schemes (e.g. `c:/foo`, +/// `a://example.net`), and converting it would regress legitimate scheme URLs. +/// Callers wanting forward-slash drive paths should pass `file:///C:/path` +/// explicitly. This matches Node's current behavior. #[inline] fn maybe_convert_windows_path_to_file_url(href: &str) -> Option { let bytes = href.as_bytes(); diff --git a/tests/unit/url_test.ts b/tests/unit/url_test.ts index 2547d83a90dc07..292d35be8df5e7 100644 --- a/tests/unit/url_test.ts +++ b/tests/unit/url_test.ts @@ -572,3 +572,21 @@ Deno.test(function urlWindowsFilePathUrlParse() { assertEquals(url.protocol, "file:"); assertEquals(url.pathname, "/C:/folder/file.txt"); }); + +Deno.test(function urlWindowsFilePathPercentEncodesSpaces() { + assertEquals( + new URL("C:\\path with space\\file.txt").href, + "file:///C:/path%20with%20space/file.txt", + ); +}); + +// Regression guard: `letter:/...` and `letter://...` shapes must NOT be +// rewritten to `file:///` — they are scheme URLs, not Windows drive paths. +Deno.test(function urlSchemeAuthorityNotMisinterpretedAsDrive() { + assertEquals(new URL("a://example.net").href, "a://example.net"); + assertEquals(new URL("h://.").href, "h://."); + assertEquals(new URL("w://x:0").href, "w://x:0"); + // `c:/foo` is a `c:` scheme URL, not a Windows drive path. Forward-slash + // drive paths require explicit `file:///C:/foo`. + assertEquals(new URL("c:/foo").href, "c:/foo"); +});