Skip to content

Commit

Permalink
Formally re-port this logic from MRI str_casecmp
Browse files Browse the repository at this point in the history
A number of things here seemed odd and did not match CRuby, so I
completed the re-porting based on 3.3 HEAD (master).
  • Loading branch information
headius committed Oct 9, 2023
1 parent ab72a2d commit 35cb07b
Showing 1 changed file with 44 additions and 37 deletions.
81 changes: 44 additions & 37 deletions core/src/main/java/org/jruby/util/StringSupport.java
Original file line number Diff line number Diff line change
Expand Up @@ -2484,50 +2484,57 @@ private static int trCode(int c, int[]trans, IntHash<Integer> hash, boolean cfla
}
}

public static int multiByteCasecmp(Encoding enc, ByteList value, ByteList otherValue) {
byte[]bytes = value.getUnsafeBytes();
int p = value.getBegin();
int end = p + value.getRealSize();

byte[]obytes = otherValue.getUnsafeBytes();
int op = otherValue.getBegin();
int oend = op + otherValue.getRealSize();

while (p < end && op < oend) {
final int c, oc;
if (enc.isAsciiCompatible()) {
c = bytes[p] & 0xff;
oc = obytes[op] & 0xff;
// MRI: multibyte portion of str_casecmp
public static int multiByteCasecmp(Encoding enc, ByteList value1, ByteList value2) {
byte[] bytes = value1.getUnsafeBytes();
int p1 = value1.getBegin();
int size1 = value1.getRealSize();
int end1 = p1 + size1;

byte[] bytes2 = value2.getUnsafeBytes();
int p2 = value2.getBegin();
int size2 = value2.getRealSize();
int end2 = p2 + size2;

int[] lenAry = {0};

while (p1 < end1 && p2 < end2) {
final int c1, c2;
int l1, l2;

c1 = EncodingUtils.encAscget(bytes, p1, end1, lenAry, enc);
l1 = lenAry[0];
c2 = EncodingUtils.encAscget(bytes2, p2, end2, lenAry, enc);
l2 = lenAry[0];

if (0 <= c1 && 0 <= c2) {
int dc = AsciiTables.ToLowerCaseTable[c1];
int odc = AsciiTables.ToLowerCaseTable[c2];

if (dc != odc) {
return dc < odc ? -1 : 1;
}
} else {
c = preciseCodePoint(enc, bytes, p, end);
oc = preciseCodePoint(enc, obytes, op, oend);
}
l1 = length(enc, bytes, p1, end1);
l2 = length(enc, bytes2, p2, end2);

final int cl, ocl;
if (Encoding.isAscii(c) && Encoding.isAscii(oc)) {
int dc = AsciiTables.ToLowerCaseTable[c];
int odc = AsciiTables.ToLowerCaseTable[oc];
if (dc != odc) return dc < odc ? -1 : 1;
int len = Math.min(l1, l2);
int ret = ByteList.memcmp(bytes, p1, bytes2, p2, len);

if (enc.isAsciiCompatible()) {
cl = ocl = 1;
} else {
cl = preciseLength(enc, bytes, p, end);
ocl = preciseLength(enc, obytes, op, oend);
if (ret != 0) {
return ret < 0 ? -1 : 1;
}

if (l1 != l2) {
return l1 < l2 ? -1 : 1;
}
} else {
cl = length(enc, bytes, p, end);
ocl = length(enc, obytes, op, oend);
int ret = caseCmp(bytes, p, obytes, op, cl < ocl ? cl : ocl);
if (ret != 0) return ret < 0 ? -1 : 1;
if (cl != ocl) return cl < ocl ? -1 : 1;
}

p += cl;
op += ocl;
p1 += l1;
p2 += l2;
}
if (end - p == oend - op) return 0;
return end - p > oend - op ? 1 : -1;
if (size1 == size2) return 0;
return size1 > size2 ? 1 : -1;
}

public static boolean singleByteSqueeze(ByteList value, boolean squeeze[]) {
Expand Down

0 comments on commit 35cb07b

Please sign in to comment.