diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index a51f7d7a30b..34d48449658 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -317,11 +317,21 @@ inline bool ConvertStringArg(const wchar_t *v, strings_internal::ShiftState s; size_t chars_written = 0; for (size_t i = 0; i < len; ++i) { + // A high surrogate must be immediately followed by a low surrogate. If it + // isn't, the UTF-16 input is malformed and WideToUtf8() would otherwise + // leave a partial sequence in the buffer. The single wchar_t path already + // rejects an unpaired surrogate, so reject it here too. + if (s.saw_high_surrogate) { + const uint32_t cu = static_cast(v[i]); + if (cu < 0xDC00 || cu > 0xDFFF) return false; + } const size_t chars = strings_internal::WideToUtf8(v[i], &mb[chars_written], s); if (chars == static_cast(-1)) { return false; } chars_written += chars; } + // A trailing high surrogate has no low surrogate to complete it. + if (s.saw_high_surrogate) return false; return ConvertStringArg(string_view(mb.data(), chars_written), conv, sink); } diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index 1c3d1a30152..5e860163910 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -357,6 +357,44 @@ TEST_F(FormatConvertTest, StringPrecision) { EXPECT_EQ("ABC", FormatPack(wformat2, {FormatArgImpl(wp)})); } +TEST_F(FormatConvertTest, WideStringUnpairedSurrogate) { + // The single wchar_t ("%lc") path rejects an unpaired surrogate. The wide + // string ("%ls") path should reject it too rather than emitting a partial + // UTF-8 sequence. A failed conversion yields an empty result. + auto format_ls = [](const std::wstring& ws) { + UntypedFormatSpecImpl format("%ls"); + return FormatPack(format, {FormatArgImpl(ws)}); + }; + + // A well-formed surrogate pair (U+10000) still converts. + std::wstring pair; + pair.push_back(static_cast(0xD800)); + pair.push_back(static_cast(0xDC00)); + EXPECT_EQ("\xF0\x90\x80\x80", format_ls(pair)); + + // Trailing high surrogate with no low surrogate to complete it. + std::wstring trailing_high; + trailing_high.push_back(static_cast(0xD800)); + EXPECT_EQ("", format_ls(trailing_high)); + + // High surrogate followed by a non-surrogate. + std::wstring high_then_ascii; + high_then_ascii.push_back(static_cast(0xD800)); + high_then_ascii.push_back(L'A'); + EXPECT_EQ("", format_ls(high_then_ascii)); + + // High surrogate followed by another high surrogate. + std::wstring high_then_high; + high_then_high.push_back(static_cast(0xD800)); + high_then_high.push_back(static_cast(0xD800)); + EXPECT_EQ("", format_ls(high_then_high)); + + // Isolated low surrogate. + std::wstring lone_low; + lone_low.push_back(static_cast(0xDC00)); + EXPECT_EQ("", format_ls(lone_low)); +} + // Pointer formatting is implementation defined. This checks that the argument // can be matched to `ptr`. MATCHER_P(MatchesPointerString, ptr, "") {