Skip to content

Commit

Permalink
Fix Reline crash with invalid encoding history
Browse files Browse the repository at this point in the history
  • Loading branch information
tompng committed Oct 1, 2024
1 parent 5353924 commit a24596d
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 4 deletions.
6 changes: 3 additions & 3 deletions lib/reline/history.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def [](index)

def []=(index, val)
index = check_index(index)
super(index, String.new(val, encoding: Reline.encoding_system_needs))
super(index, Reline::Unicode.safe_encode(val, Reline.encoding_system_needs))
end

def concat(*val)
Expand All @@ -45,7 +45,7 @@ def push(*val)
end
end
super(*(val.map{ |v|
String.new(v, encoding: Reline.encoding_system_needs)
Reline::Unicode.safe_encode(v, Reline.encoding_system_needs)
}))
end

Expand All @@ -56,7 +56,7 @@ def <<(val)
if @config.history_size.positive?
shift if size + 1 > @config.history_size
end
super(String.new(val, encoding: Reline.encoding_system_needs))
super(Reline::Unicode.safe_encode(val, Reline.encoding_system_needs))
end

private def check_index(index)
Expand Down
2 changes: 1 addition & 1 deletion lib/reline/line_editor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1325,7 +1325,7 @@ def insert_multiline_text(text)
save_old_buffer
pre = @buffer_of_lines[@line_index].byteslice(0, @byte_pointer)
post = @buffer_of_lines[@line_index].byteslice(@byte_pointer..)
lines = (pre + text.gsub(/\r\n?/, "\n") + post).split("\n", -1)
lines = (pre + Reline::Unicode.safe_encode(text, @encoding).gsub(/\r\n?/, "\n") + post).split("\n", -1)
lines << '' if lines.empty?
@buffer_of_lines[@line_index, 1] = lines
@line_index += lines.size - 1
Expand Down
16 changes: 16 additions & 0 deletions lib/reline/unicode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,22 @@ def self.escape_for_print(str)
}.join
end

def self.safe_encode(str, encoding)
# Reline only supports utf-8 convertible string.
converted = str.encode(encoding, invalid: :replace, undef: :replace)
return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only?

# This code is essentially doing the same thing as
# `str.encode(utf8, **replace_options).encode(encoding, **replace_options)`
# but also avoids unneccesary irreversible encoding conversion.
converted.gsub(/\X/) do |c|
c.encode(Encoding::UTF_8)
c
rescue Encoding::UndefinedConversionError
'?'
end
end

require 'reline/unicode/east_asian_width'

def self.get_mbchar_width(mbchar)
Expand Down
9 changes: 9 additions & 0 deletions test/reline/test_history.rb
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,15 @@ def test_history_size_negative_unlimited
assert_equal 5, history.size
end

def test_history_encoding_conversion
history = history_new
text1 = String.new("a\u{65535}b\xFFc", encoding: Encoding::UTF_8)
text2 = String.new("d\xFFe", encoding: Encoding::Shift_JIS)
history.push(text1.dup, text2.dup)
expected = [text1, text2].map { |s| s.encode(Reline.encoding_system_needs, invalid: :replace, undef: :replace) }
assert_equal(expected, history.to_a)
end

private

def history_new(history_size: 10)
Expand Down
28 changes: 28 additions & 0 deletions test/reline/test_unicode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,32 @@ def test_take_mbchar_range
assert_equal ["\e[31mc\1ABC\2d\e[0mef", 2, 4], Reline::Unicode.take_mbchar_range("\e[31mabc\1ABC\2d\e[0mefghi", 2, 4)
assert_equal ["\e[41m \e[42mい\e[43m ", 1, 4], Reline::Unicode.take_mbchar_range("\e[41mあ\e[42mい\e[43mう", 1, 4, padding: true)
end

def test_encoding_conversion
texts = [
String.new("invalid\xFFutf8", encoding: 'utf-8'),
String.new("invalid\xFFsjis", encoding: 'sjis'),
"utf8#{33111.chr('sjis')}convertible",
"utf8#{33222.chr('sjis')}inconvertible",
"sjis->utf8->sjis#{60777.chr('sjis')}irreversible"
]
utf8_texts = [
'invalid�utf8',
'invalid�sjis',
'utf8仝convertible',
'utf8�inconvertible',
'sjis->utf8->sjis劦irreversible'
]
sjis_texts = [
'invalid?utf8',
'invalid?sjis',
"utf8#{33111.chr('sjis')}convertible",
'utf8?inconvertible',
"sjis->utf8->sjis#{60777.chr('sjis')}irreversible"
]
assert_equal(utf8_texts, texts.map { |s| Reline::Unicode.safe_encode(s, 'utf-8') })
assert_equal(utf8_texts, texts.map { |s| Reline::Unicode.safe_encode(s, Encoding::UTF_8) })
assert_equal(sjis_texts, texts.map { |s| Reline::Unicode.safe_encode(s, 'sjis') })
assert_equal(sjis_texts, texts.map { |s| Reline::Unicode.safe_encode(s, Encoding::Windows_31J) })
end
end

0 comments on commit a24596d

Please sign in to comment.