From 183916453ed13a3ee1cebec1901096c92ace5747 Mon Sep 17 00:00:00 2001 From: Tim Morgan Date: Sat, 22 Jun 2024 07:38:54 -0500 Subject: [PATCH] Use '?' for replacement char sometimes --- spec/core/string/encode_spec.rb | 8 +++----- spec/core/string/shared/encode.rb | 6 ++---- src/encoding_object.cpp | 10 ++++++++-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/spec/core/string/encode_spec.rb b/spec/core/string/encode_spec.rb index 8ba7116ff..9c2412106 100644 --- a/spec/core/string/encode_spec.rb +++ b/spec/core/string/encode_spec.rb @@ -104,11 +104,9 @@ end it "replaces undefined encoding in destination with default replacement" do - NATFIXME 'undef option' do - encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace) - encoded.should == "B?".encode(Encoding::US_ASCII) - encoded.encode("UTF-8").should == "B?" - end + encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace) + encoded.should == "B?".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "B?" end it "replaces undefined encoding in destination with a specified replacement" do diff --git a/spec/core/string/shared/encode.rb b/spec/core/string/shared/encode.rb index b099beec9..5d435f93c 100644 --- a/spec/core/string/shared/encode.rb +++ b/spec/core/string/shared/encode.rb @@ -158,10 +158,8 @@ end it "replaces invalid characters in the destination encoding" do - NATFIXME 'encode options' do - xFF = [0xFF].pack('C').force_encoding('utf-8') - "ab#{xFF}c".send(@method, Encoding::ISO_8859_1, invalid: :replace).should == "ab?c" - end + xFF = [0xFF].pack('C').force_encoding('utf-8') + "ab#{xFF}c".send(@method, Encoding::ISO_8859_1, invalid: :replace).should == "ab?c" end it "calls #to_hash to convert the options object" do diff --git a/src/encoding_object.cpp b/src/encoding_object.cpp index 8c038ab25..9c2439649 100644 --- a/src/encoding_object.cpp +++ b/src/encoding_object.cpp @@ -78,7 +78,10 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje temp_string.append(options.replace_option); continue; } - unicode_codepoint = 0xFFFD; + if (is_single_byte_encoding()) + unicode_codepoint = '?'; + else + unicode_codepoint = 0xFFFD; } } else { unicode_codepoint = orig_encoding->to_unicode_codepoint(source_codepoint); @@ -134,7 +137,10 @@ Value EncodingObject::encode(Env *env, EncodingObject *orig_encoding, StringObje temp_string.append(options.replace_option); continue; } - destination_codepoint = 0xFFFD; + if (is_single_byte_encoding()) + destination_codepoint = '?'; + else + destination_codepoint = 0xFFFD; } }