diff --git a/ext/web/url.rs b/ext/web/url.rs index 51ff5e3a0c975f..fca585f1c3032c 100644 --- a/ext/web/url.rs +++ b/ext/web/url.rs @@ -50,6 +50,37 @@ pub fn op_url_parse( parse_url(state, href, None, buf) } +/// Detect Windows file paths like `C:\path\file.txt` and convert them to +/// `file:///C:/path/file.txt` per the WHATWG URL spec change (url#874). +/// +/// A Windows drive letter pattern is: a single ASCII alpha followed by `:` +/// followed by `\`. This is detected in the "scheme start state" of the URL +/// parser: the single letter would be parsed as a scheme, but the `:\` +/// combination signals a Windows path instead. +/// +/// Forward-slash drive paths (e.g. `C:/path`) are intentionally NOT converted: +/// the `letter:/` shape is ambiguous with `letter:` schemes (e.g. `c:/foo`, +/// `a://example.net`), and converting it would regress legitimate scheme URLs. +/// Callers wanting forward-slash drive paths should pass `file:///C:/path` +/// explicitly. This matches Node's current behavior. +#[inline] +fn maybe_convert_windows_path_to_file_url(href: &str) -> Option { + let bytes = href.as_bytes(); + // Must be at least 3 chars: letter, colon, backslash + if bytes.len() >= 3 + && bytes[0].is_ascii_alphabetic() + && bytes[1] == b':' + && bytes[2] == b'\\' + { + let drive_letter = bytes[0] as char; + let rest = &href[3..]; + let path = rest.replace('\\', "/"); + Some(format!("file:///{drive_letter}:/{path}")) + } else { + None + } +} + /// `op_url_parse` and `op_url_parse_with_base` share the same implementation. /// /// This function is used to parse the URL and fill the `buf` with internal @@ -81,7 +112,20 @@ fn parse_url( base_href: Option<&Url>, buf: &mut [u32], ) -> u32 { - match Url::options().base_url(base_href).parse(href) { + // Per WHATWG URL spec (url#874): detect Windows drive letter paths + // (e.g., "C:\path\file") and convert to file:/// URLs. The spec's + // scheme start state triggers this conversion regardless of base URL + // presence, since the single-letter "scheme" + `:` + `\` pattern + // unambiguously indicates a Windows drive path. + let converted; + let effective_href = + if let Some(file_url) = maybe_convert_windows_path_to_file_url(href) { + converted = file_url; + converted.as_str() + } else { + href + }; + match Url::options().base_url(base_href).parse(effective_href) { Ok(url) => { let inner_url = quirks::internal_components(&url); diff --git a/tests/unit/url_test.ts b/tests/unit/url_test.ts index 2224d00665697a..292d35be8df5e7 100644 --- a/tests/unit/url_test.ts +++ b/tests/unit/url_test.ts @@ -527,3 +527,66 @@ Deno.test(function urlTakeURLObjectAsParameter() { "https://foo:bar@baz.qat:8000/qux/quux?foo=bar&baz=12#qat", ); }); + +// WHATWG URL spec change: Windows file path handling (url#874) +Deno.test(function urlWindowsFilePathBasic() { + const url = new URL("C:\\path\\file.txt"); + assertEquals(url.href, "file:///C:/path/file.txt"); + assertEquals(url.protocol, "file:"); + assertEquals(url.pathname, "/C:/path/file.txt"); + assertEquals(url.host, ""); +}); + +Deno.test(function urlWindowsFilePathDifferentDrives() { + assertEquals(new URL("D:\\foo\\bar.exe").href, "file:///D:/foo/bar.exe"); + assertEquals( + new URL("Z:\\deep\\nested\\path.rs").href, + "file:///Z:/deep/nested/path.rs", + ); +}); + +Deno.test(function urlWindowsFilePathLowercaseDrive() { + assertEquals( + new URL("c:\\folder\\file.txt").href, + "file:///c:/folder/file.txt", + ); +}); + +Deno.test(function urlWindowsFilePathMixedSeparators() { + assertEquals( + new URL("C:\\path/mixed\\separators/file.txt").href, + "file:///C:/path/mixed/separators/file.txt", + ); +}); + +Deno.test(function urlWindowsFilePathWithBase() { + const url = new URL("C:\\path\\file.node", "http://example.org/"); + assertEquals(url.href, "file:///C:/path/file.node"); + assertEquals(url.protocol, "file:"); +}); + +Deno.test(function urlWindowsFilePathUrlParse() { + const url = URL.parse("C:\\folder\\file.txt"); + assert(url !== null); + assertEquals(url.href, "file:///C:/folder/file.txt"); + assertEquals(url.protocol, "file:"); + assertEquals(url.pathname, "/C:/folder/file.txt"); +}); + +Deno.test(function urlWindowsFilePathPercentEncodesSpaces() { + assertEquals( + new URL("C:\\path with space\\file.txt").href, + "file:///C:/path%20with%20space/file.txt", + ); +}); + +// Regression guard: `letter:/...` and `letter://...` shapes must NOT be +// rewritten to `file:///` — they are scheme URLs, not Windows drive paths. +Deno.test(function urlSchemeAuthorityNotMisinterpretedAsDrive() { + assertEquals(new URL("a://example.net").href, "a://example.net"); + assertEquals(new URL("h://.").href, "h://."); + assertEquals(new URL("w://x:0").href, "w://x:0"); + // `c:/foo` is a `c:` scheme URL, not a Windows drive path. Forward-slash + // drive paths require explicit `file:///C:/foo`. + assertEquals(new URL("c:/foo").href, "c:/foo"); +});