Formally re-port this logic from MRI str_casecmp

A number of things here seemed odd and did not match CRuby, so I completed the re-porting based on 3.3 HEAD (master).
headius · Oct 9, 2023 · 35cb07b · 35cb07b
1 parent ab72a2d
commit 35cb07b
Showing 1 changed file with 44 additions and 37 deletions.
diff --git a/core/src/main/java/org/jruby/util/StringSupport.java b/core/src/main/java/org/jruby/util/StringSupport.java
@@ -2484,50 +2484,57 @@ private static int trCode(int c, int[]trans, IntHash<Integer> hash, boolean cfla
         }
     }
 
-    public static int multiByteCasecmp(Encoding enc, ByteList value, ByteList otherValue) {
-        byte[]bytes = value.getUnsafeBytes();
-        int p = value.getBegin();
-        int end = p + value.getRealSize();
-
-        byte[]obytes = otherValue.getUnsafeBytes();
-        int op = otherValue.getBegin();
-        int oend = op + otherValue.getRealSize();
-
-        while (p < end && op < oend) {
-            final int c, oc;
-            if (enc.isAsciiCompatible()) {
-                c = bytes[p] & 0xff;
-                oc = obytes[op] & 0xff;
+    // MRI: multibyte portion of str_casecmp
+    public static int multiByteCasecmp(Encoding enc, ByteList value1, ByteList value2) {
+        byte[] bytes = value1.getUnsafeBytes();
+        int p1 = value1.getBegin();
+        int size1 = value1.getRealSize();
+        int end1 = p1 + size1;
+
+        byte[] bytes2 = value2.getUnsafeBytes();
+        int p2 = value2.getBegin();
+        int size2 = value2.getRealSize();
+        int end2 = p2 + size2;
+
+        int[] lenAry = {0};
+
+        while (p1 < end1 && p2 < end2) {
+            final int c1, c2;
+            int l1, l2;
+
+            c1 = EncodingUtils.encAscget(bytes, p1, end1, lenAry, enc);
+            l1 = lenAry[0];
+            c2 = EncodingUtils.encAscget(bytes2, p2, end2, lenAry, enc);
+            l2 = lenAry[0];
+
+            if (0 <= c1 && 0 <= c2) {
+                int dc = AsciiTables.ToLowerCaseTable[c1];
+                int odc = AsciiTables.ToLowerCaseTable[c2];
+
+                if (dc != odc) {
+                    return dc < odc ? -1 : 1;
+                }
             } else {
-                c = preciseCodePoint(enc, bytes, p, end);
-                oc = preciseCodePoint(enc, obytes, op, oend);
-            }
+                l1 = length(enc, bytes, p1, end1);
+                l2 = length(enc, bytes2, p2, end2);
 
-            final int cl, ocl;
-            if (Encoding.isAscii(c) && Encoding.isAscii(oc)) {
-                int dc = AsciiTables.ToLowerCaseTable[c];
-                int odc = AsciiTables.ToLowerCaseTable[oc];
-                if (dc != odc) return dc < odc ? -1 : 1;
+                int len = Math.min(l1, l2);
+                int ret = ByteList.memcmp(bytes, p1, bytes2, p2, len);
 
-                if (enc.isAsciiCompatible()) {
-                    cl = ocl = 1;
-                } else {
-                    cl = preciseLength(enc, bytes, p, end);
-                    ocl = preciseLength(enc, obytes, op, oend);
+                if (ret != 0) {
+                    return ret < 0 ? -1 : 1;
+                }
+
+                if (l1 != l2) {
+                    return l1 < l2 ? -1 : 1;
                 }
-            } else {
-                cl = length(enc, bytes, p, end);
-                ocl = length(enc, obytes, op, oend);
-                int ret = caseCmp(bytes, p, obytes, op, cl < ocl ? cl : ocl);
-                if (ret != 0) return ret < 0 ? -1 : 1;
-                if (cl != ocl) return cl < ocl ? -1 : 1;
             }
 
-            p += cl;
-            op += ocl;
+            p1 += l1;
+            p2 += l2;
         }
-        if (end - p == oend - op) return 0;
-        return end - p > oend - op ? 1 : -1;
+        if (size1 == size2) return 0;
+        return size1 > size2 ? 1 : -1;
     }
 
     public static boolean singleByteSqueeze(ByteList value, boolean squeeze[]) {