diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb index 21e4ea240e..26e7246a84 100644 --- a/lib/reline/unicode.rb +++ b/lib/reline/unicode.rb @@ -40,6 +40,8 @@ class Reline::Unicode CSI_REGEXP = /\e\[[\d;]*[ABCDEFGHJKSTfminsuhl]/ OSC_REGEXP = /\e\]\d+(?:;[^;\a\e]+)*(?:\a|\e\\)/ WIDTH_SCANNER = /\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o + HALFWIDTH_DAKUTEN = 0xFF9E + HALFWIDTH_HANDAKUTEN = 0xFF9F def self.escape_for_print(str) str.chars.map! { |gr| @@ -76,20 +78,24 @@ def self.get_mbchar_width(mbchar) ord = mbchar.ord if ord <= 0x1F # in EscapedPairs return 2 - elsif ord <= 0x7E # printable ASCII chars + elsif mbchar.length <= 1 && ord <= 0x7E # printable ASCII chars + # ~~~~~~~~~~~~~~~~~~ guard against the following grapheme combination character (e.g., dakuten/handakuten) return 1 end + utf8_mbchar = mbchar.encode(Encoding::UTF_8) ord = utf8_mbchar.ord + chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o } size = EastAsianWidth::CHUNK_WIDTH[chunk_index] if size == -1 Reline.ambiguous_width - elsif size == 1 && utf8_mbchar.size >= 2 - second_char_ord = utf8_mbchar[1].ord - # Halfwidth Dakuten Handakuten - # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster - (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1 + elsif halfwidth_dakuten_or_handakuten_character?(utf8_mbchar[-1]) + if utf8_mbchar.length >= 2 # Whether this is a dakuten or handakuten combination character + utf8_mbchar.each_char.sum { |char| get_mbchar_width(char) } + else + 1 + end else size end @@ -412,4 +418,11 @@ def self.word_character?(s) def self.space_character?(s) s.match?(/\s/) if s end + + def self.halfwidth_dakuten_or_handakuten_character?(s) + return false if s.encoding != Encoding::UTF_8 || !s.valid_encoding? + + ord = s.ord + ord == HALFWIDTH_DAKUTEN || ord == HALFWIDTH_HANDAKUTEN + end end diff --git a/test/reline/test_unicode.rb b/test/reline/test_unicode.rb index 0778306c32..9cfc53b57b 100644 --- a/test/reline/test_unicode.rb +++ b/test/reline/test_unicode.rb @@ -283,4 +283,16 @@ def test_character_type refute(Reline::Unicode.space_character?('-')) refute(Reline::Unicode.space_character?(nil)) end + + def test_halfwidth_dakuten_handakuten_combinations + assert_equal 1, Reline::Unicode.get_mbchar_width("\uFF9E") + assert_equal 1, Reline::Unicode.get_mbchar_width("\uFF9F") + assert_equal 2, Reline::Unicode.get_mbchar_width("ガ") + assert_equal 2, Reline::Unicode.get_mbchar_width("パ") + assert_equal 2, Reline::Unicode.get_mbchar_width("ザ") + assert_equal 2, Reline::Unicode.get_mbchar_width("a゙") + assert_equal 2, Reline::Unicode.get_mbchar_width("1゚") + assert_equal 3, Reline::Unicode.get_mbchar_width("あ゙") + assert_equal 3, Reline::Unicode.get_mbchar_width("紅゙") + end end