Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support index/length args for string passed to String#bytesplice #3674

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Compatibility:
* Fix `Env#update` and accept multiple hashes (@andrykonchin).
* Add `MAJOR`, `MINOR`, `TEENY`, `PATCHLEVEL`, `RUBY_API_VERSION`, and `RUBY_PROGRAM_VERSION` to `RbConfig::CONFIG` (#3396, @rwstauner).
* Set `RbConfig::CONFIG['archincludedir']` (#3396, @andrykonchin).
* Support the index/length arguments for the string argument to `String#bytesplice` added in 3.3 (#3656, @rwstauner).

Performance:

Expand Down
164 changes: 164 additions & 0 deletions spec/ruby/core/string/bytesplice_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,79 @@
-> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end
end

ruby_version_is "3.3" do
it "raises IndexError when str_index is less than -bytesize" do
-> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string")
end

it "raises IndexError when str_index is greater than bytesize" do
-> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string")
end

it "raises IndexError for negative str length" do
-> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2")
end
rwstauner marked this conversation as resolved.
Show resolved Hide resolved

it "replaces with integer str indices" do
"hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo"
"hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo"
"hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo"
"hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo"
"hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo"
end

it "raises RangeError when str range left boundary is less than -bytesize" do
-> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range")
end

it "replaces with str ranges" do
"hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo"
"hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo"
"hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo"
"hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo"
"hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo"
"hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo"
"hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo"
"hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo"
end

it "raises ArgumentError when integer str index is provided without str length argument" do
-> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)")
end

it "replaces on an empty string with str index/length" do
"".bytesplice(0, 0, "", 0, 0).should == ""
"".bytesplice(0, 0, "xxx", 0, 1).should == "x"
end

it "mutates self with substring and str index/length" do
s = "hello"
s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s)
s.should.eql?("hexxlo")
end

it "raises when string is frozen and str index/length" do
s = "hello".freeze
-> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end

it "replaces on an empty string with str range" do
"".bytesplice(0..0, "", 0..0).should == ""
"".bytesplice(0..0, "xyz", 0..1).should == "xy"
end

it "mutates self with substring and str range" do
s = "hello"
s.bytesplice(2..2, "xyz", 1..2).should.equal?(s)
s.should.eql?("heyzlo")
end

it "raises when string is frozen and str range" do
s = "hello".freeze
-> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"")
end
end
rwstauner marked this conversation as resolved.
Show resolved Hide resolved
end

describe "String#bytesplice with multibyte characters" do
Expand Down Expand Up @@ -131,4 +204,95 @@
result.encoding.should == Encoding::UTF_8
end
end

ruby_version_is "3.3" do
it "raises IndexError when str_index is out of byte size boundary" do
-> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string")
end

it "raises IndexError when str_index is not on a codepoint boundary" do
-> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
end

it "raises IndexError when str_length is not matching the codepoint boundary" do
-> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary")
end

it "replaces with integer str indices" do
"こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは"
"こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは"
"こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは"
"こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは"
"こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは"
end

it "replaces with str range" do
"こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは"
end

it "treats negative length for str range as 0" do
"こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは"
"こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは"
end

it "raises when ranges not match codepoint boundaries in str" do
-> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary")
-> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary")
# Begin is incorrect
-> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary")
-> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary")
# End is incorrect
-> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary")
-> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary")
end

it "deals with a different encoded argument with str index/length" do
s = "こんにちは"
s.encoding.should == Encoding::UTF_8
sub = "goodbye"
sub.force_encoding(Encoding::US_ASCII)

result = s.bytesplice(3, 3, sub, 0, 3)
result.should == "こgooにちは"
result.encoding.should == Encoding::UTF_8

s = "hello"
s.force_encoding(Encoding::US_ASCII)
sub = "こんにちは"
sub.encoding.should == Encoding::UTF_8

result = s.bytesplice(1, 2, sub, 3, 3)
result.should == "hんlo"
result.encoding.should == Encoding::UTF_8
end

it "deals with a different encoded argument with str range" do
s = "こんにちは"
s.encoding.should == Encoding::UTF_8
sub = "goodbye"
sub.force_encoding(Encoding::US_ASCII)

result = s.bytesplice(3..5, sub, 0..2)
result.should == "こgooにちは"
result.encoding.should == Encoding::UTF_8

s = "hello"
s.force_encoding(Encoding::US_ASCII)
sub = "こんにちは"
sub.encoding.should == Encoding::UTF_8

result = s.bytesplice(1..2, sub, 3..5)
result.should == "hんlo"
result.encoding.should == Encoding::UTF_8
end
end
end
59 changes: 44 additions & 15 deletions src/main/ruby/truffleruby/core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,27 @@ def byteslice(index_or_range, length = undefined)
byteslice index, length
end

def bytesplice(index_or_range, length = undefined, str)
def bytesplice(index_or_range, *args)
is_range = Primitive.is_a?(index_or_range, Range)

length = undefined
str_index_or_range = undefined
str_length = undefined
case args.size
when 1
str = args[0]
when 2
if is_range
str, str_index_or_range = args
else
length, str = args
end
when 4
length, str, str_index_or_range, str_length = args
else
raise ArgumentError, "wrong number of arguments (given #{args.size + 1}, expected 2, 3, or 5)"
andrykonchin marked this conversation as resolved.
Show resolved Hide resolved
end

if Primitive.undefined?(length)
raise TypeError, "wrong argument type #{Primitive.class(index_or_range)} (expected Range)" unless is_range

Expand All @@ -86,28 +104,39 @@ def bytesplice(index_or_range, length = undefined, str)

str = StringValue(str)

if len < 0
raise IndexError, "negative length #{len}"
end
if !Primitive.undefined?(str_index_or_range)
if Primitive.undefined?(str_length)
if !Primitive.is_a?(str_index_or_range, Range)
raise TypeError, "wrong argument type #{Primitive.class(str_index_or_range)} (expected Range)"
end

if bytesize < start || start < 0
if is_range
raise RangeError, "#{index_or_range} out of range"
str_start, str_len = Primitive.range_normalized_start_length(str_index_or_range, str.bytesize)
str_len = Primitive.max(0, str_len)
str_arg_is_range = true
else
raise IndexError, "index #{index_or_range} out of string"
str_start = Primitive.rb_to_int(str_index_or_range)
str_start += str.bytesize if str_start < 0
str_len = Primitive.rb_to_int(str_length)
str_arg_is_range = false
end
end

len = Primitive.min(bytesize - start, len)
finish = start + len
if str_len < 0
raise IndexError, "negative length #{str_length}"
end

if start < bytesize && !Primitive.string_is_character_head?(encoding, self, start)
raise IndexError, "offset #{start} does not land on character boundary"
str_len = Primitive.min(str.bytesize - str_start, str_len)
Truffle::StringOperations.validate_bytesplice_bounds(str, str_start, str_len, str_index_or_range, str_arg_is_range)

str = str.byteslice(str_start, str_len)
end
if finish < bytesize && !Primitive.string_is_character_head?(encoding, self, finish)
raise IndexError, "offset #{finish} does not land on character boundary"

if len < 0
raise IndexError, "negative length #{len}"
end

len = Primitive.min(bytesize - start, len)
Truffle::StringOperations.validate_bytesplice_bounds(self, start, len, index_or_range, is_range)

Primitive.check_mutable_string(self)
enc = Primitive.encoding_ensure_compatible_str(self, str)
Primitive.string_splice(self, str, start, len, enc)
Expand Down
24 changes: 24 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/string_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,30 @@ def self.to_sub_replacement(string, result, match)
end
end

def self.validate_bytesplice_bounds(str, start, len, index_or_range, is_range)
bytesize = str.bytesize

if bytesize < start || start < 0
if is_range
raise RangeError, "#{index_or_range} out of range"
else
raise IndexError, "index #{index_or_range} out of string"
end
end

encoding = str.encoding

if start < bytesize && !Primitive.string_is_character_head?(encoding, str, start)
raise IndexError, "offset #{start} does not land on character boundary"
end

finish = start + len

if finish < bytesize && !Primitive.string_is_character_head?(encoding, str, finish)
raise IndexError, "offset #{finish} does not land on character boundary"
end
end

def self.validate_case_mapping_options(options, downcasing)
if options.size > 2
raise ArgumentError, 'too many options'
Expand Down
Loading