Skip to content

Commit

Permalink
[GR-19220] Support index/length args for string passed to String#byte…
Browse files Browse the repository at this point in the history
…splice (#3674)

PullRequest: truffleruby/4367
  • Loading branch information
andrykonchin committed Sep 26, 2024
2 parents a23be72 + 74b3a42 commit 36351cd
Show file tree
Hide file tree
Showing 4 changed files with 233 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Compatibility:
* Fix `Env#update` and accept multiple hashes (@andrykonchin).
* Add `MAJOR`, `MINOR`, `TEENY`, `PATCHLEVEL`, `RUBY_API_VERSION`, and `RUBY_PROGRAM_VERSION` to `RbConfig::CONFIG` (#3396, @rwstauner).
* Set `RbConfig::CONFIG['archincludedir']` (#3396, @andrykonchin).
* Support the index/length arguments for the string argument to `String#bytesplice` added in 3.3 (#3656, @rwstauner).

Performance:

Expand Down
164 changes: 164 additions & 0 deletions spec/ruby/core/string/bytesplice_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,79 @@
-> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end
end

ruby_version_is "3.3" do
it "raises IndexError when str_index is less than -bytesize" do
-> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string")
end

it "raises IndexError when str_index is greater than bytesize" do
-> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string")
end

it "raises IndexError for negative str length" do
-> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2")
end

it "replaces with integer str indices" do
"hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo"
"hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo"
"hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo"
"hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo"
"hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo"
end

it "raises RangeError when str range left boundary is less than -bytesize" do
-> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range")
end

it "replaces with str ranges" do
"hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo"
"hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo"
"hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo"
"hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo"
"hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo"
"hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo"
"hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo"
"hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo"
end

it "raises ArgumentError when integer str index is provided without str length argument" do
-> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)")
end

it "replaces on an empty string with str index/length" do
"".bytesplice(0, 0, "", 0, 0).should == ""
"".bytesplice(0, 0, "xxx", 0, 1).should == "x"
end

it "mutates self with substring and str index/length" do
s = "hello"
s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s)
s.should.eql?("hexxlo")
end

it "raises when string is frozen and str index/length" do
s = "hello".freeze
-> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end

it "replaces on an empty string with str range" do
"".bytesplice(0..0, "", 0..0).should == ""
"".bytesplice(0..0, "xyz", 0..1).should == "xy"
end

it "mutates self with substring and str range" do
s = "hello"
s.bytesplice(2..2, "xyz", 1..2).should.equal?(s)
s.should.eql?("heyzlo")
end

it "raises when string is frozen and str range" do
s = "hello".freeze
-> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end
end
end

describe "String#bytesplice with multibyte characters" do
Expand Down Expand Up @@ -131,4 +204,95 @@
result.encoding.should == Encoding::UTF_8
end
end

ruby_version_is "3.3" do
it "raises IndexError when str_index is out of byte size boundary" do
-> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string")
end

it "raises IndexError when str_index is not on a codepoint boundary" do
-> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
end

it "raises IndexError when str_length is not matching the codepoint boundary" do
-> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary")
end

it "replaces with integer str indices" do
"こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは"
"こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは"
"こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは"
"こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは"
"こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは"
end

it "replaces with str range" do
"こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは"
end

it "treats negative length for str range as 0" do
"こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは"
end

it "raises when ranges not match codepoint boundaries in str" do
-> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary")
# Begin is incorrect
-> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary")
-> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary")
# End is incorrect
-> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary")
-> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary")
end

it "deals with a different encoded argument with str index/length" do
s = "こんにちは"
s.encoding.should == Encoding::UTF_8
sub = "goodbye"
sub.force_encoding(Encoding::US_ASCII)

result = s.bytesplice(3, 3, sub, 0, 3)
result.should == "こgooにちは"
result.encoding.should == Encoding::UTF_8

s = "hello"
s.force_encoding(Encoding::US_ASCII)
sub = "こんにちは"
sub.encoding.should == Encoding::UTF_8

result = s.bytesplice(1, 2, sub, 3, 3)
result.should == "hんlo"
result.encoding.should == Encoding::UTF_8
end

it "deals with a different encoded argument with str range" do
s = "こんにちは"
s.encoding.should == Encoding::UTF_8
sub = "goodbye"
sub.force_encoding(Encoding::US_ASCII)

result = s.bytesplice(3..5, sub, 0..2)
result.should == "こgooにちは"
result.encoding.should == Encoding::UTF_8

s = "hello"
s.force_encoding(Encoding::US_ASCII)
sub = "こんにちは"
sub.encoding.should == Encoding::UTF_8

result = s.bytesplice(1..2, sub, 3..5)
result.should == "hんlo"
result.encoding.should == Encoding::UTF_8
end
end
end
59 changes: 44 additions & 15 deletions src/main/ruby/truffleruby/core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,27 @@ def byteslice(index_or_range, length = undefined)
byteslice index, length
end

def bytesplice(index_or_range, length = undefined, str)
def bytesplice(index_or_range, *args)
is_range = Primitive.is_a?(index_or_range, Range)

length = undefined
str_index_or_range = undefined
str_length = undefined
case args.size
when 1
str = args[0]
when 2
if is_range
str, str_index_or_range = args
else
length, str = args
end
when 4
length, str, str_index_or_range, str_length = args
else
raise ArgumentError, "wrong number of arguments (given #{args.size + 1}, expected 2, 3, or 5)"
end

if Primitive.undefined?(length)
raise TypeError, "wrong argument type #{Primitive.class(index_or_range)} (expected Range)" unless is_range

Expand All @@ -86,28 +104,39 @@ def bytesplice(index_or_range, length = undefined, str)

str = StringValue(str)

if len < 0
raise IndexError, "negative length #{len}"
end
if !Primitive.undefined?(str_index_or_range)
if Primitive.undefined?(str_length)
if !Primitive.is_a?(str_index_or_range, Range)
raise TypeError, "wrong argument type #{Primitive.class(str_index_or_range)} (expected Range)"
end

if bytesize < start || start < 0
if is_range
raise RangeError, "#{index_or_range} out of range"
str_start, str_len = Primitive.range_normalized_start_length(str_index_or_range, str.bytesize)
str_len = Primitive.max(0, str_len)
str_arg_is_range = true
else
raise IndexError, "index #{index_or_range} out of string"
str_start = Primitive.rb_to_int(str_index_or_range)
str_start += str.bytesize if str_start < 0
str_len = Primitive.rb_to_int(str_length)
str_arg_is_range = false
end
end

len = Primitive.min(bytesize - start, len)
finish = start + len
if str_len < 0
raise IndexError, "negative length #{str_length}"
end

if start < bytesize && !Primitive.string_is_character_head?(encoding, self, start)
raise IndexError, "offset #{start} does not land on character boundary"
str_len = Primitive.min(str.bytesize - str_start, str_len)
Truffle::StringOperations.validate_bytesplice_bounds(str, str_start, str_len, str_index_or_range, str_arg_is_range)

str = str.byteslice(str_start, str_len)
end
if finish < bytesize && !Primitive.string_is_character_head?(encoding, self, finish)
raise IndexError, "offset #{finish} does not land on character boundary"

if len < 0
raise IndexError, "negative length #{len}"
end

len = Primitive.min(bytesize - start, len)
Truffle::StringOperations.validate_bytesplice_bounds(self, start, len, index_or_range, is_range)

Primitive.check_mutable_string(self)
enc = Primitive.encoding_ensure_compatible_str(self, str)
Primitive.string_splice(self, str, start, len, enc)
Expand Down
24 changes: 24 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/string_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,30 @@ def self.to_sub_replacement(string, result, match)
end
end

def self.validate_bytesplice_bounds(str, start, len, index_or_range, is_range)
bytesize = str.bytesize

if bytesize < start || start < 0
if is_range
raise RangeError, "#{index_or_range} out of range"
else
raise IndexError, "index #{index_or_range} out of string"
end
end

encoding = str.encoding

if start < bytesize && !Primitive.string_is_character_head?(encoding, str, start)
raise IndexError, "offset #{start} does not land on character boundary"
end

finish = start + len

if finish < bytesize && !Primitive.string_is_character_head?(encoding, str, finish)
raise IndexError, "offset #{finish} does not land on character boundary"
end
end

def self.validate_case_mapping_options(options, downcasing)
if options.size > 2
raise ArgumentError, 'too many options'
Expand Down

0 comments on commit 36351cd

Please sign in to comment.