Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions lib/reline/unicode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class Reline::Unicode
CSI_REGEXP = /\e\[[\d;]*[ABCDEFGHJKSTfminsuhl]/
OSC_REGEXP = /\e\]\d+(?:;[^;\a\e]+)*(?:\a|\e\\)/
WIDTH_SCANNER = /\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o
HALFWIDTH_DAKUTEN = 0xFF9E
HALFWIDTH_HANDAKUTEN = 0xFF9F

def self.escape_for_print(str)
str.chars.map! { |gr|
Expand Down Expand Up @@ -76,20 +78,24 @@ def self.get_mbchar_width(mbchar)
ord = mbchar.ord
if ord <= 0x1F # in EscapedPairs
return 2
elsif ord <= 0x7E # printable ASCII chars
elsif mbchar.length <= 1 && ord <= 0x7E # printable ASCII chars
# ~~~~~~~~~~~~~~~~~~ guard against the following grapheme combination character (e.g., dakuten/handakuten)
return 1
end

utf8_mbchar = mbchar.encode(Encoding::UTF_8)
ord = utf8_mbchar.ord

chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
if size == -1
Reline.ambiguous_width
elsif size == 1 && utf8_mbchar.size >= 2
second_char_ord = utf8_mbchar[1].ord
# Halfwidth Dakuten Handakuten
# Only these two character has Letter Modifier category and can be combined in a single grapheme cluster
(second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1
elsif halfwidth_dakuten_or_handakuten_character?(utf8_mbchar[-1])
if utf8_mbchar.length >= 2 # Whether this is a dakuten or handakuten combination character
utf8_mbchar.each_char.sum { |char| get_mbchar_width(char) }
else
1
end
else
size
end
Expand Down Expand Up @@ -412,4 +418,11 @@ def self.word_character?(s)
def self.space_character?(s)
s.match?(/\s/) if s
end

def self.halfwidth_dakuten_or_handakuten_character?(s)
return false if s.encoding != Encoding::UTF_8 || !s.valid_encoding?

ord = s.ord
ord == HALFWIDTH_DAKUTEN || ord == HALFWIDTH_HANDAKUTEN
end
end
12 changes: 12 additions & 0 deletions test/reline/test_unicode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -283,4 +283,16 @@ def test_character_type
refute(Reline::Unicode.space_character?('-'))
refute(Reline::Unicode.space_character?(nil))
end

def test_halfwidth_dakuten_handakuten_combinations
assert_equal 1, Reline::Unicode.get_mbchar_width("\uFF9E")
assert_equal 1, Reline::Unicode.get_mbchar_width("\uFF9F")
assert_equal 2, Reline::Unicode.get_mbchar_width("ガ")
assert_equal 2, Reline::Unicode.get_mbchar_width("パ")
assert_equal 2, Reline::Unicode.get_mbchar_width("ザ")
assert_equal 2, Reline::Unicode.get_mbchar_width("a゙")
assert_equal 2, Reline::Unicode.get_mbchar_width("1゚")
assert_equal 3, Reline::Unicode.get_mbchar_width("あ゙")
assert_equal 3, Reline::Unicode.get_mbchar_width("紅゙")
end
end