Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions absl/strings/internal/str_format/arg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,11 +317,21 @@ inline bool ConvertStringArg(const wchar_t *v,
strings_internal::ShiftState s;
size_t chars_written = 0;
for (size_t i = 0; i < len; ++i) {
// A high surrogate must be immediately followed by a low surrogate. If it
// isn't, the UTF-16 input is malformed and WideToUtf8() would otherwise
// leave a partial sequence in the buffer. The single wchar_t path already
// rejects an unpaired surrogate, so reject it here too.
if (s.saw_high_surrogate) {
const uint32_t cu = static_cast<uint32_t>(v[i]);
if (cu < 0xDC00 || cu > 0xDFFF) return false;
}
const size_t chars =
strings_internal::WideToUtf8(v[i], &mb[chars_written], s);
if (chars == static_cast<size_t>(-1)) { return false; }
chars_written += chars;
}
// A trailing high surrogate has no low surrogate to complete it.
if (s.saw_high_surrogate) return false;
return ConvertStringArg(string_view(mb.data(), chars_written), conv, sink);
}

Expand Down
38 changes: 38 additions & 0 deletions absl/strings/internal/str_format/convert_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,44 @@ TEST_F(FormatConvertTest, StringPrecision) {
EXPECT_EQ("ABC", FormatPack(wformat2, {FormatArgImpl(wp)}));
}

TEST_F(FormatConvertTest, WideStringUnpairedSurrogate) {
// The single wchar_t ("%lc") path rejects an unpaired surrogate. The wide
// string ("%ls") path should reject it too rather than emitting a partial
// UTF-8 sequence. A failed conversion yields an empty result.
auto format_ls = [](const std::wstring& ws) {
UntypedFormatSpecImpl format("%ls");
return FormatPack(format, {FormatArgImpl(ws)});
};

// A well-formed surrogate pair (U+10000) still converts.
std::wstring pair;
pair.push_back(static_cast<wchar_t>(0xD800));
pair.push_back(static_cast<wchar_t>(0xDC00));
EXPECT_EQ("\xF0\x90\x80\x80", format_ls(pair));

// Trailing high surrogate with no low surrogate to complete it.
std::wstring trailing_high;
trailing_high.push_back(static_cast<wchar_t>(0xD800));
EXPECT_EQ("", format_ls(trailing_high));

// High surrogate followed by a non-surrogate.
std::wstring high_then_ascii;
high_then_ascii.push_back(static_cast<wchar_t>(0xD800));
high_then_ascii.push_back(L'A');
EXPECT_EQ("", format_ls(high_then_ascii));

// High surrogate followed by another high surrogate.
std::wstring high_then_high;
high_then_high.push_back(static_cast<wchar_t>(0xD800));
high_then_high.push_back(static_cast<wchar_t>(0xD800));
EXPECT_EQ("", format_ls(high_then_high));

// Isolated low surrogate.
std::wstring lone_low;
lone_low.push_back(static_cast<wchar_t>(0xDC00));
EXPECT_EQ("", format_ls(lone_low));
}

// Pointer formatting is implementation defined. This checks that the argument
// can be matched to `ptr`.
MATCHER_P(MatchesPointerString, ptr, "") {
Expand Down