Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,17 @@ impl Parser<'_> {
/// https://url.spec.whatwg.org/#concept-basic-url-parser
pub fn parse_url(mut self, input: &str) -> ParseResult<Url> {
let input = Input::new_trim_c0_control_and_space(input, self.violation_fn);

// WHATWG URL spec change (whatwg/url#874): in scheme start state,
// when the parser sees `<ASCII alpha> : \`, it sets scheme to "file",
// host to empty, and transitions to path state. The single-letter
// "scheme" + `:\` is unambiguously a Windows drive path, not a URL.
// Forward-slash drive paths (`c:/foo`) are NOT covered by the spec
// change — those remain valid scheme URLs (e.g. `c:` scheme).
if starts_with_windows_drive_letter_path(&input) {
return self.parse_windows_drive_letter_path(input);
}

if let Ok(remaining) = self.parse_scheme(input.clone()) {
return self.parse_with_scheme(remaining);
}
Expand All @@ -401,6 +412,38 @@ impl Parser<'_> {
}
}

/// Per WHATWG URL spec change (whatwg/url#874): handle a top-level input
/// starting with `<ASCII alpha> : \` as a Windows drive letter file path.
/// Sets scheme to "file", host to empty string, and runs path state over
/// the original input. Path state treats `\` as a path separator for
/// special schemes, producing a path of the form `/C:/path/file`.
fn parse_windows_drive_letter_path(mut self, input: Input<'_>) -> ParseResult<Url> {
debug_assert!(self.serialization.is_empty());
self.serialization.push_str("file://");
let scheme_end = "file".len() as u32;
let host_start = "file://".len() as u32;
let host_end = host_start;
let host = HostInternal::None;

let remaining = self.parse_path(SchemeType::File, &mut false, host_end as usize, input);

let (query_start, fragment_start) =
self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?;

Ok(Url {
serialization: self.serialization,
scheme_end,
username_end: host_start,
host_start,
host_end,
host,
port: None,
path_start: host_end,
query_start,
fragment_start,
})
}

pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result<Input<'i>, ()> {
// starts_with will also fail for empty strings so we can skip that comparison for perf
if !input.starts_with(ascii_alpha) {
Expand Down Expand Up @@ -1795,6 +1838,19 @@ fn starts_with_windows_drive_letter(s: &str) -> bool {
&& (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
}

/// Detect the Windows drive letter file-path shape `<alpha> : \` at the start
/// of a top-level URL parser input. Per WHATWG URL spec change (whatwg/url#874),
/// this pattern is treated as a Windows drive path (file URL) rather than a
/// single-letter scheme. Forward-slash drive paths are intentionally not
/// matched: `c:/foo` is a valid `c:` scheme URL and must not be rewritten.
fn starts_with_windows_drive_letter_path(input: &Input<'_>) -> bool {
let mut iter = input.clone();
matches!(
(iter.next(), iter.next(), iter.next()),
(Some(a), Some(':'), Some('\\')) if ascii_alpha(a)
)
}

/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
fn starts_with_windows_drive_letter_segment(input: &Input<'_>) -> bool {
let mut input = input.clone();
Expand Down
95 changes: 95 additions & 0 deletions url/tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1392,3 +1392,98 @@ fn test_parse_url_with_single_byte_control_host() {
let url2 = Url::parse(url1.as_str()).unwrap();
assert_eq!(url2, url1);
}

// WHATWG URL spec change (whatwg/url#874): Windows drive letter file paths.
// `<alpha>:\` at the start of input is treated as a Windows drive path and
// parsed into a `file:///<drive>:/...` URL.

#[test]
fn windows_drive_path_basic() {
let url = Url::parse(r"C:\path\file.txt").unwrap();
assert_eq!(url.as_str(), "file:///C:/path/file.txt");
assert_eq!(url.scheme(), "file");
assert_eq!(url.host(), None);
assert_eq!(url.path(), "/C:/path/file.txt");
}

#[test]
fn windows_drive_path_different_drives() {
assert_eq!(
Url::parse(r"D:\foo\bar.exe").unwrap().as_str(),
"file:///D:/foo/bar.exe"
);
assert_eq!(
Url::parse(r"Z:\deep\nested\path.rs").unwrap().as_str(),
"file:///Z:/deep/nested/path.rs"
);
}

#[test]
fn windows_drive_path_preserves_drive_case() {
// Per spec: drive letter case is preserved.
assert_eq!(
Url::parse(r"c:\folder\file.txt").unwrap().as_str(),
"file:///c:/folder/file.txt"
);
assert_eq!(
Url::parse(r"C:\folder\file.txt").unwrap().as_str(),
"file:///C:/folder/file.txt"
);
}

#[test]
fn windows_drive_path_mixed_separators() {
// Forward slashes in the body are equivalent to backslashes for special schemes.
assert_eq!(
Url::parse(r"C:\path/mixed\separators/file.txt")
.unwrap()
.as_str(),
"file:///C:/path/mixed/separators/file.txt"
);
}

#[test]
fn windows_drive_path_percent_encodes_spaces() {
assert_eq!(
Url::parse(r"C:\path with space\file.txt").unwrap().as_str(),
"file:///C:/path%20with%20space/file.txt"
);
}

#[test]
fn windows_drive_path_drops_base() {
// The Windows-drive shortcut at top level ignores any base URL — the input
// is unambiguously absolute (file scheme, empty host).
let base = Url::parse("http://example.org/").unwrap();
let url = Url::options()
.base_url(Some(&base))
.parse(r"C:\path\file.node")
.unwrap();
assert_eq!(url.as_str(), "file:///C:/path/file.node");
assert_eq!(url.scheme(), "file");
}

#[test]
fn windows_drive_path_with_query_and_fragment() {
let url = Url::parse(r"C:\path\file.txt?q=1#frag").unwrap();
assert_eq!(url.as_str(), "file:///C:/path/file.txt?q=1#frag");
assert_eq!(url.query(), Some("q=1"));
assert_eq!(url.fragment(), Some("frag"));
}

// Regression guard: `letter:/...` and `letter://...` shapes must NOT be
// rewritten to `file:///` — these are scheme URLs (single-letter schemes are
// valid per RFC 3986), not Windows drive paths. The spec change is scoped to
// the `<alpha> : \` (backslash) shape only.
#[test]
fn windows_drive_path_does_not_rewrite_scheme_urls() {
assert_eq!(
Url::parse("a://example.net").unwrap().as_str(),
"a://example.net"
);
assert_eq!(Url::parse("h://.").unwrap().as_str(), "h://.");
assert_eq!(Url::parse("w://x:0").unwrap().as_str(), "w://x:0");
// `c:/foo` is a `c:` scheme URL, not a Windows drive path. Forward-slash
// drive paths remain untouched and require explicit `file:///C:/foo`.
assert_eq!(Url::parse("c:/foo").unwrap().as_str(), "c:/foo");
}