Skip to content

Commit

Permalink
Arity-split String#encode!
Browse files Browse the repository at this point in the history
This led to a few other things getting split, plus a lambda form
that avoids the temporary string carrier array.
  • Loading branch information
headius committed Dec 21, 2023
1 parent 1fd1f73 commit 02a557a
Show file tree
Hide file tree
Showing 2 changed files with 194 additions and 95 deletions.
57 changes: 35 additions & 22 deletions core/src/main/java/org/jruby/RubyString.java
Original file line number Diff line number Diff line change
Expand Up @@ -6596,39 +6596,43 @@ public IRubyObject encoding(ThreadContext context) {
return context.runtime.getEncodingService().getEncoding(value.getEncoding());
}

// TODO: re-split this
public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0) {
return encode_bang(context, new IRubyObject[]{arg0});
}
@JRubyMethod(name = "encode!")
public IRubyObject encode_bang(ThreadContext context) {
modify19();

public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
return encode_bang(context, new IRubyObject[]{arg0,arg1});
return EncodingUtils.strTranscode(context, this, RubyString::updateFromTranscode);
}

public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) {
return encode_bang(context, new IRubyObject[]{arg0,arg1,arg2});
@JRubyMethod(name = "encode!")
public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0) {
modify19();

return EncodingUtils.strTranscode(context, arg0, this, RubyString::updateFromTranscode);
}

@JRubyMethod(name = "encode!", optional = 3, checkArity = false)
public IRubyObject encode_bang(ThreadContext context, IRubyObject[] args) {
Arity.checkArgumentCount(context, args, 0, 3);
@JRubyMethod(name = "encode!")
public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
modify19();

IRubyObject[] newstr_p;
Encoding encindex;
return EncodingUtils.strTranscode(context, arg0, arg1, this, RubyString::updateFromTranscode);
}

@JRubyMethod(name = "encode!")
public IRubyObject encode_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) {
modify19();

newstr_p = new IRubyObject[]{this};
encindex = EncodingUtils.strTranscode(context, args, newstr_p);
return EncodingUtils.strTranscode(context, arg0, arg1, arg2, this, RubyString::updateFromTranscode);
}

if (encindex == null) return this;
if (newstr_p[0] == this) {
setEncoding(encindex);
return this;
private static RubyString updateFromTranscode(ThreadContext context, RubyString self, Encoding encindex, RubyString newstr) {
if (encindex == null) return self;
if (newstr == self) {
self.setEncoding(encindex);
return self;
}
replace(newstr_p[0]);
setEncoding(encindex);
return this;
self.replace(newstr);
self.setEncoding(encindex);
return self;
}

@JRubyMethod
Expand Down Expand Up @@ -7244,4 +7248,13 @@ public RubyArray unpack(IRubyObject obj) {
return Pack.unpack(getRuntime(), this.value, stringValue(obj).value);
}

@Deprecated
public IRubyObject encode_bang(ThreadContext context, IRubyObject[] args) {
Arity.checkArgumentCount(context, args, 0, 2);

modify19();

return EncodingUtils.strTranscode(context, args, this, RubyString::updateFromTranscode);
}

}
232 changes: 159 additions & 73 deletions core/src/main/java/org/jruby/util/io/EncodingUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -870,26 +870,26 @@ public static Encoding toEncodingIndex(ThreadContext context, IRubyObject enc) {
}

// encoded_dup
public static IRubyObject encodedDup(ThreadContext context, IRubyObject newstr, IRubyObject str, Encoding encindex) {
if (encindex == null) return str.dup();
public static RubyString encodedDup(ThreadContext context, RubyString str, Encoding encindex, RubyString newstr) {
if (encindex == null) return (RubyString) str.dup();
if (newstr == str) {
newstr = str.dup();
newstr = (RubyString) str.dup();
} else {
// set to same superclass
((RubyBasicObject)newstr).setMetaClass(str.getMetaClass());
newstr.setMetaClass(str.getMetaClass());
}
((RubyString)newstr).modify19();
return strEncodeAssociate(context, newstr, encindex);
newstr.modify19();
return strEncodeAssociate(newstr, encindex);
}

// str_encode_associate
public static IRubyObject strEncodeAssociate(ThreadContext context, IRubyObject str, Encoding encidx) {
public static RubyString strEncodeAssociate(RubyString str, Encoding encidx) {
encAssociateIndex(str, encidx);

if (encAsciicompat(encidx)) {
((RubyString)str).scanForCodeRange();
str.scanForCodeRange();
} else {
((RubyString)str).setCodeRange(StringSupport.CR_VALID);
str.setCodeRange(StringSupport.CR_VALID);
}

return str;
Expand All @@ -911,21 +911,25 @@ public static IRubyObject encAssociateIndex(IRubyObject obj, Encoding encidx) {
}

// str_encode
public static IRubyObject strEncode(ThreadContext context, IRubyObject str, IRubyObject... args) {
IRubyObject[] newstr_p = {str};
public static IRubyObject strEncode(ThreadContext context, RubyString str) {
return strTranscode(context, str, EncodingUtils::encodedDup);
}

public static IRubyObject strEncode(ThreadContext context, RubyString str, IRubyObject arg0) {
return strTranscode(context, arg0, str, EncodingUtils::encodedDup);
}

Encoding dencindex = strTranscode(context, args, newstr_p);
public static IRubyObject strEncode(ThreadContext context, RubyString str, IRubyObject arg0, IRubyObject arg1) {
return strTranscode(context, arg0, arg1, str, EncodingUtils::encodedDup);
}

return encodedDup(context, newstr_p[0], str, dencindex);
public static IRubyObject strEncode(ThreadContext context, RubyString str, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) {
return strTranscode(context, arg0, arg1, arg2, str, EncodingUtils::encodedDup);
}

// rb_str_encode
public static IRubyObject rbStrEncode(ThreadContext context, IRubyObject str, IRubyObject to, int ecflags, IRubyObject ecopt) {
IRubyObject[] newstr_p = {str};

Encoding dencindex = strTranscode0(context, 1, new IRubyObject[]{to}, newstr_p, ecflags, ecopt);

return encodedDup(context, newstr_p[0], str, dencindex);
return strTranscode1(context, to, (RubyString) str, ecflags, ecopt, EncodingUtils::encodedDup);
}

// rb_str_encode
Expand Down Expand Up @@ -973,76 +977,116 @@ protected static boolean noDecorators(int ecflags) {
}

// str_transcode
public static Encoding strTranscode(ThreadContext context, IRubyObject[] args, IRubyObject[] self_p) {
int ecflags = 0;
int argc = args.length;
IRubyObject[] ecopts_p = {context.nil};
public static IRubyObject strTranscode(ThreadContext context, IRubyObject[] args, RubyString str, TranscodeResult result) {
switch (args.length) {
case 0:
return strTranscode(context, str, result);
case 1:
return strTranscode(context, args[0], str, result);
case 2:
return strTranscode(context, args[0], args[1], str, result);
default:
throw context.runtime.newArgumentError(args.length, 2);
}
}

if (args.length >= 1) {
IRubyObject tmp = TypeConverter.checkHashType(context.runtime, args[args.length - 1]);
if (!tmp.isNil()) {
argc--;
ecflags = econvPrepareOpts(context, tmp, ecopts_p);
}
public interface TranscodeResult {
RubyString apply(ThreadContext context, RubyString str, Encoding enc, RubyString newStr);
}

public static IRubyObject strTranscode(ThreadContext context, RubyString str, TranscodeResult result) {
return strTranscode0(context, str, 0, context.nil, result);
}

public static IRubyObject strTranscode(ThreadContext context, IRubyObject arg0, RubyString str, TranscodeResult result) {
return strTranscode1(context, arg0, str, 0, context.nil, result);
}

public static IRubyObject strTranscode(ThreadContext context, IRubyObject arg0, IRubyObject arg1, RubyString str, TranscodeResult result) {
return strTranscode2(context, arg0, arg1, str, 0, context.nil, result);
}

public static IRubyObject strTranscode(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2, RubyString str, TranscodeResult result) {
return strTranscode3(context, arg0, arg1, arg2, str, 0, context.nil, result);
}

private static IRubyObject strTranscode0(ThreadContext context, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) {
IRubyObject arg1 = context.runtime.getEncodingService().getDefaultInternal();
if (arg1 == null || arg1.isNil()) {
if (ecflags == 0) return null;
arg1 = objEncoding(context, str);
}

return strTranscode0(context, argc, args, self_p, ecflags, ecopts_p[0]);
boolean explicitlyInvalidReplace = (ecflags & EConvFlags.INVALID_MASK) != 0;

ecflags |= EConvFlags.INVALID_REPLACE | EConvFlags.UNDEF_REPLACE;

return strTranscode(context, arg1, context.nil, str, ecflags, ecopts, result, explicitlyInvalidReplace);
}

// str_transcode0
public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObject[] args, IRubyObject[] self_p, int ecflags, IRubyObject ecopts) {
Ruby runtime = context.runtime;
private static IRubyObject strTranscode1(ThreadContext context, IRubyObject arg1, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) {
IRubyObject tmp = TypeConverter.checkHashType(context.runtime, arg1);
if (!tmp.isNil()) {
IRubyObject[] ecopts_p = {context.nil};
ecflags = econvPrepareOpts(context, tmp, ecopts_p);
return strTranscode0(context, str, ecflags, ecopts_p[0], result);
}

IRubyObject str = self_p[0];
IRubyObject arg1, arg2;
Encoding[] senc_p = {null}, denc_p = {null};
byte[][] sname_p = {null}, dname_p = {null};
Encoding dencindex;
boolean explicitlyInvalidReplace = true;
return strTranscode(context, arg1, context.nil, str, ecflags, ecopts, result, true);
}

if (argc > 2) {
throw context.runtime.newArgumentError(args.length, 2);
private static IRubyObject strTranscode2(ThreadContext context, IRubyObject arg1, IRubyObject arg2, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) {
IRubyObject tmp = TypeConverter.checkHashType(context.runtime, arg2);
if (!tmp.isNil()) {
IRubyObject[] ecopts_p = {context.nil};
ecflags = econvPrepareOpts(context, tmp, ecopts_p);
return strTranscode1(context, arg1, str, ecflags, ecopts_p[0], result);
}

if (argc == 0) {
arg1 = runtime.getEncodingService().getDefaultInternal();
if (arg1 == null || arg1.isNil()) {
if (ecflags == 0) return null;
arg1 = objEncoding(context, str);
}
if ((ecflags & EConvFlags.INVALID_MASK) == 0) {
explicitlyInvalidReplace = false;
}
ecflags |= EConvFlags.INVALID_REPLACE | EConvFlags.UNDEF_REPLACE;
} else {
arg1 = args[0];
return strTranscode(context, arg1, arg2, str, ecflags, ecopts, result, true);
}

private static IRubyObject strTranscode3(ThreadContext context, IRubyObject arg1, IRubyObject arg2, IRubyObject arg3, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result) {
IRubyObject tmp = TypeConverter.checkHashType(context.runtime, arg3);
if (tmp.isNil()) {
throw context.runtime.newArgumentError(3, 0, 2);
}

arg2 = argc <= 1 ? context.nil : args[1];
dencindex = strTranscodeEncArgs(context, str, arg1, arg2, sname_p, senc_p, dname_p, denc_p);
IRubyObject[] ecopts_p = {context.nil};
ecflags = econvPrepareOpts(context, tmp, ecopts_p);
return strTranscode2(context, arg1, arg2, str, ecflags, ecopts_p[0], result);
}

private static RubyString strTranscode(ThreadContext context, IRubyObject arg1, IRubyObject arg2, RubyString str, int ecflags, IRubyObject ecopts, TranscodeResult result, boolean explicitlyInvalidReplace) {
Ruby runtime = context.runtime;

Encoding[] senc_p = {null}, denc_p = {null};
byte[][] sname_p = {null}, dname_p = {null};
Encoding dencindex = strTranscodeEncArgs(context, str, arg1, arg2, sname_p, senc_p, dname_p, denc_p);

IRubyObject dest;
RubyString dest;

if (noDecorators(ecflags)) {
dest = str;
if (senc_p[0] != null && senc_p[0] == denc_p[0]) {
if ((ecflags & EConvFlags.INVALID_MASK) != 0 && explicitlyInvalidReplace) {
IRubyObject rep = context.nil;
if (!ecopts.isNil()) {
rep = ((RubyHash)ecopts).op_aref(context, runtime.newSymbol("replace"));
rep = ((RubyHash) ecopts).op_aref(context, runtime.newSymbol("replace"));
}
dest = ((RubyString)str).encStrScrub(context, senc_p[0], rep, Block.NULL_BLOCK);
dest = (RubyString) str.encStrScrub(context, senc_p[0], rep, Block.NULL_BLOCK);
if (dest.isNil()) dest = str;
self_p[0] = dest;
return dencindex;
}
return arg2.isNil() ? null : dencindex;
} else if (senc_p[0] != null && denc_p[0] != null && senc_p[0].isAsciiCompatible() && denc_p[0].isAsciiCompatible()) {
if (((RubyString)str).scanForCodeRange() == StringSupport.CR_7BIT) {
return dencindex;
} else if (arg2.isNil()){
dencindex = null;
}
}
if (encodingEqual(sname_p[0], dname_p[0])) {
return arg2.isNil() ? null : dencindex;
return result.apply(context, str, dencindex, dest);
} else if (senc_p[0] != null && denc_p[0] != null
&& senc_p[0].isAsciiCompatible() && denc_p[0].isAsciiCompatible()
&& str.scanForCodeRange() == StringSupport.CR_7BIT) {
return result.apply(context, str, dencindex, str);
} else if (encodingEqual(sname_p[0], dname_p[0])) {
if (arg2.isNil()) dencindex = null;
return result.apply(context, str, dencindex, str);
}
} else {
if (encodingEqual(sname_p[0], dname_p[0])) {
Expand All @@ -1051,12 +1095,12 @@ public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObjec
}
}

ByteList sp = ((RubyString)str).getByteList();
ByteList sp = str.getByteList();
ByteList fromp = sp;
int slen = ((RubyString)str).size();
int slen = str.size();
int blen = slen + 30;
dest = RubyString.newStringLight(runtime, blen);
ByteList destp = ((RubyString)dest).getByteList();
ByteList destp = dest.getByteList();

byte[] frompBytes = fromp.unsafeBytes();
byte[] destpBytes = destp.unsafeBytes();
Expand All @@ -1074,9 +1118,7 @@ public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObjec
dencindex = defineDummyEncoding(context, dname_p[0]);
}

self_p[0] = dest;

return dencindex;
return result.apply(context, str, dencindex, dest);
}

// rb_obj_encoding
Expand Down Expand Up @@ -2341,4 +2383,48 @@ public static Encoding ioStripBOM(RubyIO io) {
return ioStripBOM(io.getRuntime().getCurrentContext(), io);
}

@Deprecated
public static Encoding strTranscode0(ThreadContext context, int argc, IRubyObject[] args, IRubyObject[] self_p, int ecflags, IRubyObject ecopts) {
Encoding[] enc_p = {null};
TranscodeResult result = (ctx, str, enc, newStr) -> {enc_p[0] = enc; self_p[0] = newStr; return newStr;};
switch (argc) {
case 0:
strTranscode0(context, (RubyString) self_p[0], ecflags, ecopts, result);
return enc_p[0];
case 1:
strTranscode1(context, args[0], (RubyString) self_p[0], ecflags, ecopts, result);
return enc_p[0];
case 2:
strTranscode2(context, args[0], args[1], (RubyString) self_p[0], ecflags, ecopts, result);
return enc_p[0];
default:
throw context.runtime.newArgumentError(args.length, 2);
}
}

@Deprecated
public static Encoding strTranscode(ThreadContext context, IRubyObject[] args, IRubyObject[] self_p) {
Encoding[] enc_p = {null};
TranscodeResult result = (ctx, str, enc, newStr) -> {enc_p[0] = enc; self_p[0] = newStr; return newStr;};

strTranscode(context, args, (RubyString) self_p[0], result);

return enc_p[0];
}

@Deprecated
public static IRubyObject strEncode(ThreadContext context, IRubyObject str, IRubyObject... args) {
return strTranscode(context, args, (RubyString) str, EncodingUtils::encodedDup);
}

@Deprecated
public static IRubyObject encodedDup(ThreadContext context, IRubyObject newstr, IRubyObject str, Encoding encindex) {
return encodedDup(context, (RubyString) newstr, encindex, (RubyString) str);
}

@Deprecated
public static IRubyObject strEncodeAssociate(ThreadContext context, IRubyObject str, Encoding encidx) {
return strEncodeAssociate((RubyString) str, encidx);
}

}

0 comments on commit 02a557a

Please sign in to comment.