Skip to content

Commit a23a070

Browse files
authored
Merge pull request #28 from Earlopain/invalid-encoding-stuff
Handle invalid encoded strings
2 parents b00c5bf + bc47d28 commit a23a070

File tree

2 files changed

+19
-2
lines changed

2 files changed

+19
-2
lines changed

lib/unicode/display_width.rb

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,14 @@ class DisplayWidth
4747

4848
# Returns monospace display width of string
4949
def self.of(string, ambiguous = nil, overwrite = nil, old_options = {}, **options)
50-
string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
50+
# Binary strings don't make much sense when calculating display width.
51+
# Assume it's valid UTF-8
52+
if string.encoding == Encoding::BINARY && !string.force_encoding(Encoding::UTF_8).valid_encoding?
53+
# Didn't work out, go back to binary
54+
string.force_encoding(Encoding::BINARY)
55+
end
56+
57+
string = string.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) unless string.encoding == Encoding::UTF_8
5158
options = normalize_options(string, ambiguous, overwrite, old_options, **options)
5259

5360
width = 0
@@ -236,4 +243,3 @@ def of(string, **kwargs)
236243
end
237244
end
238245
end
239-

spec/display_width_spec.rb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,17 @@
183183
it 'works with non-utf8 Unicode encodings' do
184184
expect( 'À'.encode("UTF-16LE").display_width ).to eq 1
185185
end
186+
187+
it 'works with a string that is invalid in its encoding' do
188+
s = "\x81\x39".dup.force_encoding(Encoding::SHIFT_JIS)
189+
190+
# Would print as �9 on the terminal
191+
expect( s.display_width ).to eq 2
192+
end
193+
194+
it 'works with a binary encoded string that is valid in UTF-8' do
195+
expect( '€'.b.display_width ).to eq 1
196+
end
186197
end
187198

188199
describe '[emoji]' do

0 commit comments

Comments
 (0)